From 696fcc309bd3a38e34f188a638f969aaf1025d2f Mon Sep 17 00:00:00 2001
From: Vojtech Pavlik <vojtech@twilight.ucw.cz>
Date: Thu, 4 Jul 2002 01:38:31 +0200
Subject: Minor fixes to make the whole thing compile on latest 2.5 and kbuild2

---
 include/linux/input.h | 104 +++++++++++++++++++++++++-------------------------
 1 file changed, 53 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index aa5e80d60330..76130b47a59a 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -2,7 +2,7 @@
 #define _INPUT_H
 
 /*
- * $Id: input.h,v 1.57 2002/01/02 11:59:56 vojtech Exp $
+ * $Id: input.h,v 1.68 2002/05/31 10:35:49 fsirl Exp $
  *
  *  Copyright (c) 1999-2001 Vojtech Pavlik
  */
@@ -322,12 +322,13 @@ struct input_event {
 #define KEY_FINANCE		219
 #define KEY_SPORT		220
 #define KEY_SHOP		221
-
-#define KEY_UNKNOWN		240
-
+#define KEY_ALTERASE		222
+#define KEY_CANCEL		223
 #define KEY_BRIGHTNESSDOWN	224
 #define KEY_BRIGHTNESSUP	225
 
+#define KEY_UNKNOWN		240
+
 #define BTN_MISC		0x100
 #define BTN_0			0x100
 #define BTN_1			0x101
@@ -394,6 +395,10 @@ struct input_event {
 #define BTN_STYLUS		0x14b
 #define BTN_STYLUS2		0x14c
 
+#define BTN_WHEEL		0x150
+#define BTN_GEAR_DOWN		0x150
+#define BTN_GEAR_UP		0x151
+
 #define KEY_MAX			0x1ff
 
 /*
@@ -514,23 +519,19 @@ struct input_event {
  * Structures used in ioctls to upload effects to a device
  * The first structures are not passed directly by using ioctls.
  * They are sub-structures of the actually sent structure (called ff_effect)
- *
- * Ranges:
- *  0 <= __u16 <= 65535
- *  -32767 <= __s16 <= +32767     ! Not -32768 for lower bound !
  */
 
 struct ff_replay {
-	__u16 length;		/* Duration of an effect in ms. All other times are also expressed in ms */
-	__u16 delay;		/* Time to wait before to start playing an effect */
+	__u16 length; /* Duration of an effect in ms. All other times are also expressed in ms */
+	__u16 delay;  /* Time to wait before to start playing an effect */
 };
 
 struct ff_trigger {
-	__u16 button;		/* Number of button triggering an effect */
-	__u16 interval;		/* Time to wait before an effect can be re-triggered (ms) */
+	__u16 button;   /* Number of button triggering an effect */
+	__u16 interval; /* Time to wait before an effect can be re-triggered (ms) */
 };
 
-struct ff_shape {
+struct ff_envelope {
 	__u16 attack_length;	/* Duration of attack (ms) */
 	__u16 attack_level;	/* Level at beginning of attack */
 	__u16 fade_length;	/* Duration of fade (ms) */
@@ -539,41 +540,56 @@ struct ff_shape {
 
 /* FF_CONSTANT */
 struct ff_constant_effect {
-	__s16 level;		/* Strength of effect. Negative values are OK */
-	struct ff_shape shape;
+	__s16 level;	    /* Strength of effect. Negative values are OK */
+	struct ff_envelope envelope;
 };
 
-/* FF_SPRING of FF_FRICTION */
-struct ff_interactive_effect {
-/* Axis along which effect must be created. If null, the field named direction
- * is used
- * It is a bit array (ie to enable axes X and Y, use BIT(ABS_X) | BIT(ABS_Y)
- * It overrides the value of ff_effect::direction, which is used only if
- * axis == 0
- */
-	__u16 axis;
+/* FF_RAMP */
+struct ff_ramp_effect {
+	__s16 start_level;
+	__s16 end_level;
+	struct ff_envelope envelope;
+};
 
+/* FF_SPRING of FF_FRICTION */
+struct ff_condition_effect {
 	__u16 right_saturation; /* Max level when joystick is on the right */
-	__u16 left_saturation;	/* Max level when joystick in on the left */
+	__u16 left_saturation;  /* Max level when joystick in on the left */
 
 	__s16 right_coeff;	/* Indicates how fast the force grows when the
 				   joystick moves to the right */
 	__s16 left_coeff;	/* Same for left side */
 
-	__u16 deadband;		/* Size of area where no force is produced */
-	__s16 center;		/* Position of dead dead zone */
+	__u16 deadband;	/* Size of area where no force is produced */
+	__s16 center;	/* Position of dead zone */
 
 };
 
 /* FF_PERIODIC */
 struct ff_periodic_effect {
-	__u16 waveform;		/* Kind of wave (sine, square...) */
-	__u16 period;		/* in ms */
+	__u16 waveform;	/* Kind of wave (sine, square...) */
+	__u16 period;	/* in ms */
 	__s16 magnitude;	/* Peak value */
-	__s16 offset;		/* Mean value of wave (roughly) */
+	__s16 offset;	/* Mean value of wave (roughly) */
 	__u16 phase;		/* 'Horizontal' shift */
 
-	struct ff_shape shape;
+	struct ff_envelope envelope;
+
+/* Only used if waveform == FF_CUSTOM */
+	__u32 custom_len;	/* Number of samples  */	
+	__s16 *custom_data;	/* Buffer of samples */
+/* Note: the data pointed by custom_data is copied by the driver. You can
+ * therefore dispose of the memory after the upload/update */
+};
+
+/* FF_RUMBLE */
+/* Some rumble pads have two motors of different weight.
+   strong_magnitude represents the magnitude of the vibration generated
+   by the heavy motor.
+*/
+struct ff_rumble_effect {
+	__u16 strong_magnitude;  /* Magnitude of the heavy motor */
+	__u16 weak_magnitude;    /* Magnitude of the light one */
 };
 
 /*
@@ -598,26 +614,13 @@ struct ff_effect {
 
 	union {
 		struct ff_constant_effect constant;
+		struct ff_ramp_effect ramp;
 		struct ff_periodic_effect periodic;
-		struct ff_interactive_effect interactive;
+		struct ff_condition_effect condition[2]; /* One for each axis */
+		struct ff_rumble_effect rumble;
 	} u;
 };
 
-/*
- * Buttons that can trigger effects. Use for example FF_BTN(BTN_TRIGGER) to
- * access the bitmap.
- */
-
-#define FF_BTN(x)	((x) - BTN_MISC + FF_BTN_OFFSET)
-#define FF_BTN_OFFSET	0x00
-
-/*
- * Force feedback axis mappings. Use FF_ABS() to access the bitmap.
- */
-
-#define FF_ABS(x)	((x) + FF_ABS_OFFSET)
-#define FF_ABS_OFFSET	0x40
-
 /*
  * Force feedback effect types
  */
@@ -627,6 +630,9 @@ struct ff_effect {
 #define FF_CONSTANT	0x52
 #define FF_SPRING	0x53
 #define FF_FRICTION	0x54
+#define FF_DAMPER	0x55
+#define FF_INERTIA	0x56
+#define FF_RAMP		0x57
 
 /*
  * Force feedback periodic effect types
@@ -668,8 +674,6 @@ struct input_dev {
 	char *name;
 	char *phys;
 	char *uniq;
-	int number;
-
 	unsigned short idbus;
 	unsigned short idvendor;
 	unsigned short idproduct;
@@ -707,8 +711,6 @@ struct input_dev {
 	int absfuzz[ABS_MAX + 1];
 	int absflat[ABS_MAX + 1];
 
-	int only_one_writer;
-
 	int (*open)(struct input_dev *dev);
 	void (*close)(struct input_dev *dev);
 	int (*accept)(struct input_dev *dev, struct file *file);
-- 
cgit v1.2.3


From e7ae11b6d73daaf485d3af3df179d837ed3e0d41 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:30:10 -0700
Subject: [PATCH] per-cpu buffer_head cache

ext2 and ext3 implement a custom LRU cache of buffer_heads - the eight
most-recently-used inode bitmap buffers and the eight MRU block bitmap
buffers.

I don't like them, for a number of reasons:

- The code is duplicated between filesystems

- The functionality is unavailable to other filesystems

- The LRU only applies to bitmap buffers.  And not, say, indirects.

- The LRUs are subtly dependent upon lock_super() for protection:
  without lock_super protection a bitmap could be evicted and freed
  while in use.

  And removing this dependence on lock_super() gets us one step on
  the way toward getting that semaphore out of the ext2 block allocator -
  it causes significant contention under some loads and should be a
  spinlock.

- The LRUs pin 64 kbytes per mounted filesystem.

Now, we could just delete those LRUs and rely on the VM to manage the
memory.  But that would introduce significant lock contention in
__find_get_block - the blockdev mapping's private_lock and page_lock
are heavily used.

So this patch introduces a transparent per-CPU bh lru which is hidden
inside __find_get_block(), __getblk() and __bread().  It is designed to
shorten code paths and to reduce lock contention.  It uses a seven-slot
LRU.  It achieves a 99% hit rate in `dbench 64'.  It provides benefit
to all filesystems.

The next patches remove the open-coded LRUs from ext2 and ext3.

Taken together, these patches are a code cleanup (300-400 lines gone),
and they reduce lock contention.  Anton tested these patches on the
32-way and demonstrated a throughput improvement of up to 15% on
RAM-only dbench runs.  See http://samba.org/~anton/linux/2.5.24/dbench/

Most of this benefit is from avoiding find_get_page() on the blockdev
mapping.  Because the generic LRU copes with indirect blocks as well as
bitmaps.
---
 fs/buffer.c                 | 172 +++++++++++++++++++++++++++++++++++++++++++-
 include/linux/buffer_head.h |  49 ++++---------
 kernel/ksyms.c              |   3 -
 3 files changed, 184 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index dde8e7d9bae6..d46b55b0cf2c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -36,6 +36,8 @@
 #include <linux/buffer_head.h>
 #include <asm/bitops.h>
 
+static void invalidate_bh_lrus(void);
+
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
 
 /*
@@ -389,7 +391,7 @@ out:
  * private_lock is contended then so is mapping->page_lock).
  */
 struct buffer_head *
-__find_get_block(struct block_device *bdev, sector_t block, int unused)
+__find_get_block_slow(struct block_device *bdev, sector_t block, int unused)
 {
 	struct inode *bd_inode = bdev->bd_inode;
 	struct address_space *bd_mapping = bd_inode->i_mapping;
@@ -459,6 +461,7 @@ out:
    pass does the actual I/O. */
 void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
 {
+	invalidate_bh_lrus();
 	/*
 	 * FIXME: what about destroy_dirty_buffers?
 	 * We really want to use invalidate_inode_pages2() for
@@ -1159,7 +1162,7 @@ grow_buffers(struct block_device *bdev, unsigned long block, int size)
  * attempt is failing.  FIXME, perhaps?
  */
 struct buffer_head *
-__getblk(struct block_device *bdev, sector_t block, int size)
+__getblk_slow(struct block_device *bdev, sector_t block, int size)
 {
 	for (;;) {
 		struct buffer_head * bh;
@@ -1259,7 +1262,8 @@ void __bforget(struct buffer_head *bh)
  *  Reads a specified block, and returns buffer head that contains it.
  *  It returns NULL if the block was unreadable.
  */
-struct buffer_head * __bread(struct block_device *bdev, int block, int size)
+struct buffer_head *
+__bread_slow(struct block_device *bdev, sector_t block, int size)
 {
 	struct buffer_head *bh = __getblk(bdev, block, size);
 
@@ -1283,6 +1287,165 @@ struct buffer_head * __bread(struct block_device *bdev, int block, int size)
 	return NULL;
 }
 
+/*
+ * Per-cpu buffer LRU implementation.  To reduce the cost of __find_get_block().
+ * The bhs[] array is sorted - newest buffer is at bhs[0].  Buffers have their
+ * refcount elevated by one when they're in an LRU.  A buffer can only appear
+ * once in a particular CPU's LRU.  A single buffer can be present in multiple
+ * CPU's LRUs at the same time.
+ *
+ * This is a transparent caching front-end to sb_bread(), sb_getblk() and
+ * sb_find_get_block().
+ */
+
+#define BH_LRU_SIZE	7
+
+static struct bh_lru {
+	spinlock_t lock;
+	struct buffer_head *bhs[BH_LRU_SIZE];
+} ____cacheline_aligned_in_smp bh_lrus[NR_CPUS];
+
+/*
+ * The LRU management algorithm is dopey-but-simple.  Sorry.
+ */
+static void bh_lru_install(struct buffer_head *bh)
+{
+	struct buffer_head *evictee = NULL;
+	struct bh_lru *lru;
+
+	if (bh == NULL)
+		return;
+
+	lru = &bh_lrus[get_cpu()];
+	spin_lock(&lru->lock);
+	if (lru->bhs[0] != bh) {
+		struct buffer_head *bhs[BH_LRU_SIZE];
+		int in;
+		int out = 0;
+
+		get_bh(bh);
+		bhs[out++] = bh;
+		for (in = 0; in < BH_LRU_SIZE; in++) {
+			struct buffer_head *bh2 = lru->bhs[in];
+
+			if (bh2 == bh) {
+				__brelse(bh2);
+			} else {
+				if (out >= BH_LRU_SIZE) {
+					BUG_ON(evictee != NULL);
+					evictee = bh2;
+				} else {
+					bhs[out++] = bh2;
+				}
+			}
+		}
+		while (out < BH_LRU_SIZE)
+			bhs[out++] = NULL;
+		memcpy(lru->bhs, bhs, sizeof(bhs));
+	}
+	spin_unlock(&lru->lock);
+	put_cpu();
+
+	if (evictee) {
+		touch_buffer(evictee);
+		__brelse(evictee);
+	}
+}
+
+static inline struct buffer_head *
+lookup_bh(struct block_device *bdev, sector_t block, int size)
+{
+	struct buffer_head *ret = NULL;
+	struct bh_lru *lru;
+	int i;
+
+	lru = &bh_lrus[get_cpu()];
+	spin_lock(&lru->lock);
+	for (i = 0; i < BH_LRU_SIZE; i++) {
+		struct buffer_head *bh = lru->bhs[i];
+
+		if (bh && bh->b_bdev == bdev &&
+				bh->b_blocknr == block && bh->b_size == size) {
+			if (i) {
+				while (i) {
+					lru->bhs[i] = lru->bhs[i - 1];
+					i--;
+				}
+				lru->bhs[0] = bh;
+			}
+			get_bh(bh);
+			ret = bh;
+			break;
+		}
+	}
+	spin_unlock(&lru->lock);
+	put_cpu();
+	return ret;
+}
+
+struct buffer_head *
+__find_get_block(struct block_device *bdev, sector_t block, int size)
+{
+	struct buffer_head *bh = lookup_bh(bdev, block, size);
+
+	if (bh == NULL) {
+		bh = __find_get_block_slow(bdev, block, size);
+		bh_lru_install(bh);
+	}
+	return bh;
+}
+EXPORT_SYMBOL(__find_get_block);
+
+struct buffer_head *
+__getblk(struct block_device *bdev, sector_t block, int size)
+{
+	struct buffer_head *bh = __find_get_block(bdev, block, size);
+
+	if (bh == NULL) {
+		bh = __getblk_slow(bdev, block, size);
+		bh_lru_install(bh);
+	}
+	return bh;
+}
+EXPORT_SYMBOL(__getblk);
+
+struct buffer_head *
+__bread(struct block_device *bdev, sector_t block, int size)
+{
+	struct buffer_head *bh = __getblk(bdev, block, size);
+
+	if (bh) {
+		if (buffer_uptodate(bh))
+			return bh;
+		__brelse(bh);
+	}
+	bh = __bread_slow(bdev, block, size);
+	bh_lru_install(bh);
+	return bh;
+}
+EXPORT_SYMBOL(__bread);
+
+/*
+ * This is called rarely - at unmount.
+ */
+static void invalidate_bh_lrus(void)
+{
+	int cpu_idx;
+
+	for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++)
+		spin_lock(&bh_lrus[cpu_idx].lock);
+	for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++) {
+		int i;
+
+		for (i = 0; i < BH_LRU_SIZE; i++) {
+			brelse(bh_lrus[cpu_idx].bhs[i]);
+			bh_lrus[cpu_idx].bhs[i] = NULL;
+		}
+	}
+	for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++)
+		spin_unlock(&bh_lrus[cpu_idx].lock);
+}
+
 void set_bh_page(struct buffer_head *bh,
 		struct page *page, unsigned long offset)
 {
@@ -2435,6 +2598,9 @@ void __init buffer_init(void)
 {
 	int i;
 
+	for (i = 0; i < NR_CPUS; i++)
+		spin_lock_init(&bh_lrus[i].lock);
+
 	bh_cachep = kmem_cache_create("buffer_head",
 			sizeof(struct buffer_head), 0,
 			SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 4fc6bab55825..a94644322d86 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -164,7 +164,7 @@ struct buffer_head *__find_get_block(struct block_device *, sector_t, int);
 struct buffer_head * __getblk(struct block_device *, sector_t, int);
 void __brelse(struct buffer_head *);
 void __bforget(struct buffer_head *);
-struct buffer_head * __bread(struct block_device *, int, int);
+struct buffer_head *__bread(struct block_device *, sector_t block, int size);
 void wakeup_bdflush(void);
 struct buffer_head *alloc_buffer_head(void);
 void free_buffer_head(struct buffer_head * bh);
@@ -201,9 +201,9 @@ int generic_osync_inode(struct inode *, int);
  * inline definitions
  */
 
-static inline void get_bh(struct buffer_head * bh)
+static inline void get_bh(struct buffer_head *bh)
 {
-        atomic_inc(&(bh)->b_count);
+        atomic_inc(&bh->b_count);
 }
 
 static inline void put_bh(struct buffer_head *bh)
@@ -212,68 +212,49 @@ static inline void put_bh(struct buffer_head *bh)
         atomic_dec(&bh->b_count);
 }
 
-/*
- * If an error happens during the make_request, this function
- * has to be recalled. It marks the buffer as clean and not
- * uptodate, and it notifys the upper layer about the end
- * of the I/O.
- */
-static inline void buffer_IO_error(struct buffer_head * bh)
-{
-	clear_buffer_dirty(bh);
-
-	/*
-	 * b_end_io has to clear the BH_Uptodate bitflag in the read error
-	 * case, however buffer contents are not necessarily bad if a
-	 * write fails
-	 */
-	bh->b_end_io(bh, buffer_uptodate(bh));
-}
-
-
-static inline void brelse(struct buffer_head *buf)
+static inline void brelse(struct buffer_head *bh)
 {
-	if (buf)
-		__brelse(buf);
+	if (bh)
+		__brelse(bh);
 }
 
-static inline void bforget(struct buffer_head *buf)
+static inline void bforget(struct buffer_head *bh)
 {
-	if (buf)
-		__bforget(buf);
+	if (bh)
+		__bforget(bh);
 }
 
-static inline struct buffer_head * sb_bread(struct super_block *sb, int block)
+static inline struct buffer_head *sb_bread(struct super_block *sb, sector_t block)
 {
 	return __bread(sb->s_bdev, block, sb->s_blocksize);
 }
 
-static inline struct buffer_head * sb_getblk(struct super_block *sb, int block)
+static inline struct buffer_head *sb_getblk(struct super_block *sb, sector_t block)
 {
 	return __getblk(sb->s_bdev, block, sb->s_blocksize);
 }
 
 static inline struct buffer_head *
-sb_find_get_block(struct super_block *sb, int block)
+sb_find_get_block(struct super_block *sb, sector_t block)
 {
 	return __find_get_block(sb->s_bdev, block, sb->s_blocksize);
 }
 
 static inline void
-map_bh(struct buffer_head *bh, struct super_block *sb, int block)
+map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block)
 {
 	set_buffer_mapped(bh);
 	bh->b_bdev = sb->s_bdev;
 	bh->b_blocknr = block;
 }
 
-static inline void wait_on_buffer(struct buffer_head * bh)
+static inline void wait_on_buffer(struct buffer_head *bh)
 {
 	if (buffer_locked(bh))
 		__wait_on_buffer(bh);
 }
 
-static inline void lock_buffer(struct buffer_head * bh)
+static inline void lock_buffer(struct buffer_head *bh)
 {
 	while (test_set_buffer_locked(bh))
 		__wait_on_buffer(bh);
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index cbf06ff3725b..d2d6fe4794cc 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -196,14 +196,12 @@ EXPORT_SYMBOL(notify_change);
 EXPORT_SYMBOL(set_blocksize);
 EXPORT_SYMBOL(sb_set_blocksize);
 EXPORT_SYMBOL(sb_min_blocksize);
-EXPORT_SYMBOL(__getblk);
 EXPORT_SYMBOL(cdget);
 EXPORT_SYMBOL(cdput);
 EXPORT_SYMBOL(bdget);
 EXPORT_SYMBOL(bdput);
 EXPORT_SYMBOL(bd_claim);
 EXPORT_SYMBOL(bd_release);
-EXPORT_SYMBOL(__bread);
 EXPORT_SYMBOL(__brelse);
 EXPORT_SYMBOL(__bforget);
 EXPORT_SYMBOL(ll_rw_block);
@@ -549,7 +547,6 @@ EXPORT_SYMBOL(file_fsync);
 EXPORT_SYMBOL(fsync_buffers_list);
 EXPORT_SYMBOL(clear_inode);
 EXPORT_SYMBOL(init_special_inode);
-EXPORT_SYMBOL(__find_get_block);
 EXPORT_SYMBOL(new_inode);
 EXPORT_SYMBOL(__insert_inode_hash);
 EXPORT_SYMBOL(remove_inode_hash);
-- 
cgit v1.2.3


From 7ef751c5711d164a7f80aa92fa0dc00ddb18e166 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:30:15 -0700
Subject: [PATCH] Remove ext2's buffer_head cache

Remove ext2's open-coded bitmap LRUs.  Core kernel does this for it now.
---
 fs/ext2/balloc.c           | 162 ++++++++++++++-----------------------------
 fs/ext2/ialloc.c           | 166 +++++++++++++++------------------------------
 fs/ext2/super.c            |  14 ----
 include/linux/ext2_fs_sb.h |  14 ----
 4 files changed, 107 insertions(+), 249 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 1a84181e4322..e204a55b415e 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -75,9 +75,8 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
  *
  * Return buffer_head on success or NULL in case of failure.
  */
-
-static struct buffer_head *read_block_bitmap(struct super_block *sb,
-						unsigned int block_group)
+static struct buffer_head *
+read_block_bitmap(struct super_block *sb, unsigned int block_group)
 {
 	struct ext2_group_desc * desc;
 	struct buffer_head * bh = NULL;
@@ -95,78 +94,6 @@ error_out:
 	return bh;
 }
 
-/*
- * load_block_bitmap loads the block bitmap for a blocks group
- *
- * It maintains a cache for the last bitmaps loaded.  This cache is managed
- * with a LRU algorithm.
- *
- * Notes:
- * 1/ There is one cache per mounted file system.
- * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups,
- *    this function reads the bitmap without maintaining a LRU cache.
- * 
- * Return the buffer_head of the bitmap or ERR_PTR(-ve).
- */
-static struct buffer_head *load_block_bitmap(struct super_block * sb,
-						unsigned int block_group)
-{
-	struct ext2_sb_info *sbi = EXT2_SB(sb);
-	int i, slot = 0;
-	struct buffer_head *bh = sbi->s_block_bitmap[0];
-
-	if (block_group >= sbi->s_groups_count)
-		ext2_panic (sb, "load_block_bitmap",
-			    "block_group >= groups_count - "
-			    "block_group = %d, groups_count = %lu",
-			    block_group, sbi->s_groups_count);
-	
-	/*
-	 * Do the lookup for the slot.  First of all, check if we're asking
-	 * for the same slot as last time, and did we succeed that last time?
-	 */
-	if (sbi->s_loaded_block_bitmaps > 0 &&
-	    sbi->s_block_bitmap_number[0] == block_group && bh)
-		goto found;
-
-	if (sbi->s_groups_count <= EXT2_MAX_GROUP_LOADED) {
-		slot = block_group;
-		bh = sbi->s_block_bitmap[slot];
-		if (!bh)
-			goto read_it;
-		if (sbi->s_block_bitmap_number[slot] == slot)
-			goto found;
-		ext2_panic (sb, "load_block_bitmap",
-			    "block_group != block_bitmap_number");
-	}
-
-	bh = NULL;
-	for (i = 0; i < sbi->s_loaded_block_bitmaps &&
-		    sbi->s_block_bitmap_number[i] != block_group; i++)
-		;
-	if (i < sbi->s_loaded_block_bitmaps)
-		bh = sbi->s_block_bitmap[i];
-	else if (sbi->s_loaded_block_bitmaps < EXT2_MAX_GROUP_LOADED)
-		sbi->s_loaded_block_bitmaps++;
-	else
-		brelse (sbi->s_block_bitmap[--i]);
-
-	while (i--) {
-		sbi->s_block_bitmap_number[i+1] = sbi->s_block_bitmap_number[i];
-		sbi->s_block_bitmap[i+1] = sbi->s_block_bitmap[i];
-	}
-
-read_it:
-	if (!bh)
-		bh = read_block_bitmap(sb, block_group);
-	sbi->s_block_bitmap_number[slot] = block_group;
-	sbi->s_block_bitmap[slot] = bh;
-	if (!bh)
-		return ERR_PTR(-EIO);
-found:
-	return bh;
-}
-
 static inline int reserve_blocks(struct super_block *sb, int count)
 {
 	struct ext2_sb_info * sbi = EXT2_SB(sb);
@@ -238,7 +165,7 @@ static inline void group_release_blocks(struct ext2_group_desc *desc,
 void ext2_free_blocks (struct inode * inode, unsigned long block,
 		       unsigned long count)
 {
-	struct buffer_head * bh;
+	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head * bh2;
 	unsigned long block_group;
 	unsigned long bit;
@@ -275,8 +202,9 @@ do_more:
 		overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
 		count -= overflow;
 	}
-	bh = load_block_bitmap (sb, block_group);
-	if (IS_ERR(bh))
+	brelse(bitmap_bh);
+	bitmap_bh = read_block_bitmap(sb, block_group);
+	if (!bitmap_bh)
 		goto error_return;
 
 	desc = ext2_get_group_desc (sb, block_group, &bh2);
@@ -295,7 +223,7 @@ do_more:
 			    block, count);
 
 	for (i = 0, group_freed = 0; i < count; i++) {
-		if (!ext2_clear_bit (bit + i, bh->b_data))
+		if (!ext2_clear_bit(bit + i, bitmap_bh->b_data))
 			ext2_error (sb, "ext2_free_blocks",
 				      "bit already cleared for block %lu",
 				      block + i);
@@ -303,10 +231,10 @@ do_more:
 			group_freed++;
 	}
 
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty(bitmap_bh);
 	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ll_rw_block (WRITE, 1, &bh);
-		wait_on_buffer (bh);
+		ll_rw_block(WRITE, 1, &bitmap_bh);
+		wait_on_buffer(bitmap_bh);
 	}
 
 	group_release_blocks(desc, bh2, group_freed);
@@ -318,6 +246,7 @@ do_more:
 		goto do_more;
 	}
 error_return:
+	brelse(bitmap_bh);
 	release_blocks(sb, freed);
 	unlock_super (sb);
 	DQUOT_FREE_BLOCK(inode, freed);
@@ -384,7 +313,7 @@ got_it:
 int ext2_new_block (struct inode * inode, unsigned long goal,
     u32 * prealloc_count, u32 * prealloc_block, int * err)
 {
-	struct buffer_head *bh;
+	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *bh2;
 	struct ext2_group_desc *desc;
 	int i, j, k, tmp;
@@ -431,13 +360,14 @@ int ext2_new_block (struct inode * inode, unsigned long goal,
 	group_alloc = group_reserve_blocks(desc, bh2, es_alloc);
 	if (group_alloc) {
 		j = ((goal - le32_to_cpu(es->s_first_data_block)) % group_size);
-		bh = load_block_bitmap (sb, i);
-		if (IS_ERR(bh))
+		brelse(bitmap_bh);
+		bitmap_bh = read_block_bitmap(sb, i);
+		if (!bitmap_bh)
 			goto io_error;
 		
 		ext2_debug ("goal is at %d:%d.\n", i, j);
 
-		j = grab_block(bh->b_data, group_size, j);
+		j = grab_block(bitmap_bh->b_data, group_size, j);
 		if (j >= 0)
 			goto got_block;
 		group_release_blocks(desc, bh2, group_alloc);
@@ -461,11 +391,12 @@ int ext2_new_block (struct inode * inode, unsigned long goal,
 	}
 	if (k >= sbi->s_groups_count)
 		goto out_release;
-	bh = load_block_bitmap (sb, i);
-	if (IS_ERR(bh))
+	brelse(bitmap_bh);
+	bitmap_bh = read_block_bitmap(sb, i);
+	if (!bitmap_bh)
 		goto io_error;
 	
-	j = grab_block(bh->b_data, group_size, 0);
+	j = grab_block(bitmap_bh->b_data, group_size, 0);
 	if (j < 0) {
 		ext2_error (sb, "ext2_new_block",
 			    "Free blocks count corrupted for block group %d", i);
@@ -510,7 +441,7 @@ got_block:
 		unsigned n;
 
 		for (n = 0; n < group_alloc && ++j < group_size; n++) {
-			if (ext2_set_bit (j, bh->b_data))
+			if (ext2_set_bit(j, bitmap_bh->b_data))
  				break;
 		}
 		*prealloc_block = block + 1;
@@ -521,10 +452,10 @@ got_block:
 	}
 	write_unlock(&EXT2_I(inode)->i_meta_lock);
 
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty(bitmap_bh);
 	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ll_rw_block (WRITE, 1, &bh);
-		wait_on_buffer (bh);
+		ll_rw_block(WRITE, 1, &bitmap_bh);
+		wait_on_buffer(bitmap_bh);
 	}
 
 	ext2_debug ("allocating block %d. ", block);
@@ -537,6 +468,7 @@ out_unlock:
 	unlock_super (sb);
 	DQUOT_FREE_BLOCK(inode, dq_alloc);
 out:
+	brelse(bitmap_bh);
 	return block;
 
 io_error:
@@ -558,19 +490,20 @@ unsigned long ext2_count_free_blocks (struct super_block * sb)
 	bitmap_count = 0;
 	desc = NULL;
 	for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
-		struct buffer_head *bh;
+		struct buffer_head *bitmap_bh;
 		desc = ext2_get_group_desc (sb, i, NULL);
 		if (!desc)
 			continue;
 		desc_count += le16_to_cpu(desc->bg_free_blocks_count);
-		bh = load_block_bitmap (sb, i);
-		if (IS_ERR(bh))
+		bitmap_bh = read_block_bitmap(sb, i);
+		if (!bitmap_bh)
 			continue;
 		
-		x = ext2_count_free (bh, sb->s_blocksize);
+		x = ext2_count_free(bitmap_bh, sb->s_blocksize);
 		printk ("group %d: stored = %d, counted = %lu\n",
 			i, le16_to_cpu(desc->bg_free_blocks_count), x);
 		bitmap_count += x;
+		brelse(bitmap_bh);
 	}
 	printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
 	       le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count);
@@ -645,7 +578,7 @@ unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
 /* Called at mount-time, super-block is locked */
 void ext2_check_blocks_bitmap (struct super_block * sb)
 {
-	struct buffer_head * bh;
+	struct buffer_head *bitmap_bh = NULL;
 	struct ext2_super_block * es;
 	unsigned long desc_count, bitmap_count, x, j;
 	unsigned long desc_blocks;
@@ -661,38 +594,43 @@ void ext2_check_blocks_bitmap (struct super_block * sb)
 		if (!desc)
 			continue;
 		desc_count += le16_to_cpu(desc->bg_free_blocks_count);
-		bh = load_block_bitmap (sb, i);
-		if (IS_ERR(bh))
+		brelse(bitmap_bh);
+		bitmap_bh = read_block_bitmap(sb, i);
+		if (!bitmap_bh)
 			continue;
 
-		if (ext2_bg_has_super(sb, i) && !ext2_test_bit(0, bh->b_data))
+		if (ext2_bg_has_super(sb, i) &&
+				!ext2_test_bit(0, bitmap_bh->b_data))
 			ext2_error(sb, __FUNCTION__,
 				   "Superblock in group %d is marked free", i);
 
 		desc_blocks = ext2_bg_num_gdb(sb, i);
 		for (j = 0; j < desc_blocks; j++)
-			if (!ext2_test_bit(j + 1, bh->b_data))
+			if (!ext2_test_bit(j + 1, bitmap_bh->b_data))
 				ext2_error(sb, __FUNCTION__,
 					   "Descriptor block #%ld in group "
 					   "%d is marked free", j, i);
 
-		if (!block_in_use (le32_to_cpu(desc->bg_block_bitmap), sb, bh->b_data))
-			ext2_error (sb, "ext2_check_blocks_bitmap",
+		if (!block_in_use(le32_to_cpu(desc->bg_block_bitmap),
+					sb, bitmap_bh->b_data))
+			ext2_error(sb, "ext2_check_blocks_bitmap",
 				    "Block bitmap for group %d is marked free",
 				    i);
 
-		if (!block_in_use (le32_to_cpu(desc->bg_inode_bitmap), sb, bh->b_data))
-			ext2_error (sb, "ext2_check_blocks_bitmap",
+		if (!block_in_use(le32_to_cpu(desc->bg_inode_bitmap),
+					sb, bitmap_bh->b_data))
+			ext2_error(sb, "ext2_check_blocks_bitmap",
 				    "Inode bitmap for group %d is marked free",
 				    i);
 
 		for (j = 0; j < EXT2_SB(sb)->s_itb_per_group; j++)
-			if (!block_in_use (le32_to_cpu(desc->bg_inode_table) + j, sb, bh->b_data))
+			if (!block_in_use(le32_to_cpu(desc->bg_inode_table) + j,
+						sb, bitmap_bh->b_data))
 				ext2_error (sb, "ext2_check_blocks_bitmap",
 					    "Block #%ld of the inode table in "
 					    "group %d is marked free", j, i);
 
-		x = ext2_count_free (bh, sb->s_blocksize);
+		x = ext2_count_free(bitmap_bh, sb->s_blocksize);
 		if (le16_to_cpu(desc->bg_free_blocks_count) != x)
 			ext2_error (sb, "ext2_check_blocks_bitmap",
 				    "Wrong free blocks count for group %d, "
@@ -702,8 +640,10 @@ void ext2_check_blocks_bitmap (struct super_block * sb)
 	}
 	if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
 		ext2_error (sb, "ext2_check_blocks_bitmap",
-			    "Wrong free blocks count in super block, "
-			    "stored = %lu, counted = %lu",
-			    (unsigned long) le32_to_cpu(es->s_free_blocks_count), bitmap_count);
+			"Wrong free blocks count in super block, "
+			"stored = %lu, counted = %lu",
+			(unsigned long)le32_to_cpu(es->s_free_blocks_count),
+			bitmap_count);
+	brelse(bitmap_bh);
 }
 #endif
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 26f53a854a9b..54085ac1ff02 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -30,8 +30,7 @@
  *
  * The file system contains group descriptors which are located after the
  * super block.  Each descriptor contains the number of the bitmap block and
- * the free blocks count in the block.  The descriptors are loaded in memory
- * when a file system is mounted (see ext2_read_super).
+ * the free blocks count in the block.
  */
 
 
@@ -41,8 +40,8 @@
  *
  * Return buffer_head of bitmap on success or NULL.
  */
-static struct buffer_head *read_inode_bitmap (struct super_block * sb,
-					       unsigned long block_group)
+static struct buffer_head *
+read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 {
 	struct ext2_group_desc *desc;
 	struct buffer_head *bh = NULL;
@@ -53,7 +52,7 @@ static struct buffer_head *read_inode_bitmap (struct super_block * sb,
 
 	bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
 	if (!bh)
-		ext2_error (sb, "read_inode_bitmap",
+		ext2_error(sb, "read_inode_bitmap",
 			    "Cannot read inode bitmap - "
 			    "block_group = %lu, inode_bitmap = %lu",
 			    block_group, (unsigned long) desc->bg_inode_bitmap);
@@ -61,75 +60,6 @@ error_out:
 	return bh;
 }
 
-/*
- * load_inode_bitmap loads the inode bitmap for a blocks group
- *
- * It maintains a cache for the last bitmaps loaded.  This cache is managed
- * with a LRU algorithm.
- *
- * Notes:
- * 1/ There is one cache per mounted file system.
- * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups,
- *    this function reads the bitmap without maintaining a LRU cache.
- * 
- * Return the buffer_head of the bitmap or the ERR_PTR(error)
- */
-static struct buffer_head *load_inode_bitmap (struct super_block * sb,
-					      unsigned int block_group)
-{
-	int i, slot = 0;
-	struct ext2_sb_info *sbi = EXT2_SB(sb);
-	struct buffer_head *bh = sbi->s_inode_bitmap[0];
-
-	if (block_group >= sbi->s_groups_count)
-		ext2_panic (sb, "load_inode_bitmap",
-			    "block_group >= groups_count - "
-			    "block_group = %d, groups_count = %lu",
-			     block_group, sbi->s_groups_count);
-
-	if (sbi->s_loaded_inode_bitmaps > 0 &&
-	    sbi->s_inode_bitmap_number[0] == block_group && bh)
-		goto found;
-
-	if (sbi->s_groups_count <= EXT2_MAX_GROUP_LOADED) {
-		slot = block_group;
-		bh = sbi->s_inode_bitmap[slot];
-		if (!bh)
-			goto read_it;
-		if (sbi->s_inode_bitmap_number[slot] == slot)
-			goto found;
-		ext2_panic (sb, "load_inode_bitmap",
-			    "block_group != inode_bitmap_number");
-	}
-
-	bh = NULL;
-	for (i = 0; i < sbi->s_loaded_inode_bitmaps &&
-		    sbi->s_inode_bitmap_number[i] != block_group;
-	     i++)
-		;
-	if (i < sbi->s_loaded_inode_bitmaps)
-		bh = sbi->s_inode_bitmap[i];
-	else if (sbi->s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED)
-		sbi->s_loaded_inode_bitmaps++;
-	else
-		brelse (sbi->s_inode_bitmap[--i]);
-
-	while (i--) {
-		sbi->s_inode_bitmap_number[i+1] = sbi->s_inode_bitmap_number[i];
-		sbi->s_inode_bitmap[i+1] = sbi->s_inode_bitmap[i];
-	}
-
-read_it:
-	if (!bh)
-		bh = read_inode_bitmap (sb, block_group);
-	sbi->s_inode_bitmap_number[slot] = block_group;
-	sbi->s_inode_bitmap[slot] = bh;
-	if (!bh)
-		return ERR_PTR(-EIO);
-found:
-	return bh;
-}
-
 /*
  * NOTE! When we get the inode, we're the only people
  * that have access to it, and as such there are no
@@ -151,8 +81,8 @@ void ext2_free_inode (struct inode * inode)
 	struct super_block * sb = inode->i_sb;
 	int is_directory;
 	unsigned long ino;
-	struct buffer_head * bh;
-	struct buffer_head * bh2;
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *bh2;
 	unsigned long block_group;
 	unsigned long bit;
 	struct ext2_group_desc * desc;
@@ -186,12 +116,13 @@ void ext2_free_inode (struct inode * inode)
 	}
 	block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
 	bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb);
-	bh = load_inode_bitmap (sb, block_group);
-	if (IS_ERR(bh))
+	brelse(bitmap_bh);
+	bitmap_bh = read_inode_bitmap(sb, block_group);
+	if (!bitmap_bh)
 		goto error_return;
 
 	/* Ok, now we can actually update the inode bitmaps.. */
-	if (!ext2_clear_bit (bit, bh->b_data))
+	if (!ext2_clear_bit(bit, bitmap_bh->b_data))
 		ext2_error (sb, "ext2_free_inode",
 			      "bit already cleared for inode %lu", ino);
 	else {
@@ -208,13 +139,14 @@ void ext2_free_inode (struct inode * inode)
 			cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
 		mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
 	}
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty(bitmap_bh);
 	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ll_rw_block (WRITE, 1, &bh);
-		wait_on_buffer (bh);
+		ll_rw_block(WRITE, 1, &bitmap_bh);
+		wait_on_buffer(bitmap_bh);
 	}
 	sb->s_dirt = 1;
 error_return:
+	brelse(bitmap_bh);
 	unlock_super (sb);
 }
 
@@ -351,9 +283,9 @@ found:
 
 struct inode * ext2_new_inode(struct inode * dir, int mode)
 {
-	struct super_block * sb;
-	struct buffer_head * bh;
-	struct buffer_head * bh2;
+	struct super_block *sb;
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *bh2;
 	int group, i;
 	ino_t ino;
 	struct inode * inode;
@@ -361,6 +293,7 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
 	struct ext2_super_block * es;
 	struct ext2_inode_info *ei;
 	int err;
+	struct inode *ret;
 
 	sb = dir->i_sb;
 	inode = new_inode(sb);
@@ -381,20 +314,21 @@ repeat:
 		goto fail;
 
 	err = -EIO;
-	bh = load_inode_bitmap (sb, group);
-	if (IS_ERR(bh))
+	brelse(bitmap_bh);
+	bitmap_bh = read_inode_bitmap(sb, group);
+	if (!bitmap_bh)
 		goto fail2;
 
-	i = ext2_find_first_zero_bit ((unsigned long *) bh->b_data,
+	i = ext2_find_first_zero_bit((unsigned long *)bitmap_bh->b_data,
 				      EXT2_INODES_PER_GROUP(sb));
 	if (i >= EXT2_INODES_PER_GROUP(sb))
 		goto bad_count;
-	ext2_set_bit (i, bh->b_data);
+	ext2_set_bit(i, bitmap_bh->b_data);
 
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty(bitmap_bh);
 	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ll_rw_block (WRITE, 1, &bh);
-		wait_on_buffer (bh);
+		ll_rw_block(WRITE, 1, &bitmap_bh);
+		wait_on_buffer(bitmap_bh);
 	}
 
 	ino = group * EXT2_INODES_PER_GROUP(sb) + i + 1;
@@ -452,17 +386,19 @@ repeat:
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
 
-	unlock_super (sb);
+	unlock_super(sb);
+	ret = inode;
 	if(DQUOT_ALLOC_INODE(inode)) {
 		DQUOT_DROP(inode);
 		inode->i_flags |= S_NOQUOTA;
 		inode->i_nlink = 0;
 		iput(inode);
-		return ERR_PTR(-EDQUOT);
+		ret = ERR_PTR(-EDQUOT);
+	} else {
+		ext2_debug("allocating inode %lu\n", inode->i_ino);
+		ext2_preread_inode(inode);
 	}
-	ext2_debug ("allocating inode %lu\n", inode->i_ino);
-	ext2_preread_inode(inode);
-	return inode;
+	goto out;
 
 fail2:
 	desc = ext2_get_group_desc (sb, group, &bh2);
@@ -476,7 +412,8 @@ fail:
 	unlock_super(sb);
 	make_bad_inode(inode);
 	iput(inode);
-	return ERR_PTR(err);
+	ret = ERR_PTR(err);
+	goto out;
 
 bad_count:
 	ext2_error (sb, "ext2_new_inode",
@@ -491,6 +428,9 @@ bad_count:
 	desc->bg_free_inodes_count = 0;
 	mark_buffer_dirty(bh2);
 	goto repeat;
+out:
+	brelse(bitmap_bh);
+	return ret;
 }
 
 unsigned long ext2_count_free_inodes (struct super_block * sb)
@@ -498,30 +438,33 @@ unsigned long ext2_count_free_inodes (struct super_block * sb)
 #ifdef EXT2FS_DEBUG
 	struct ext2_super_block * es;
 	unsigned long desc_count = 0, bitmap_count = 0;
+	struct buffer_head *bitmap_bh = NULL;
 	int i;
 
 	lock_super (sb);
 	es = EXT2_SB(sb)->s_es;
 	for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
-		struct ext2_group_desc *desc = ext2_get_group_desc (sb, i, NULL);
-		struct buffer_head *bh;
+		struct ext2_group_desc *desc;
 		unsigned x;
 
+		desc = ext2_get_group_desc (sb, i, NULL);
 		if (!desc)
 			continue;
 		desc_count += le16_to_cpu(desc->bg_free_inodes_count);
-		bh = load_inode_bitmap (sb, i);
-		if (IS_ERR(bh))
+		brelse(bitmap_bh);
+		bitmap_bh = read_inode_bitmap(sb, i);
+		if (!bitmap_bh)
 			continue;
 
-		x = ext2_count_free (bh, EXT2_INODES_PER_GROUP(sb) / 8);
+		x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8);
 		printk ("group %d: stored = %d, counted = %lu\n",
 			i, le16_to_cpu(desc->bg_free_inodes_count), x);
 		bitmap_count += x;
 	}
+	brelse(bitmap_bh);
 	printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
 		le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
-	unlock_super (sb);
+	unlock_super(sb);
 	return desc_count;
 #else
 	return le32_to_cpu(EXT2_SB(sb)->s_es->s_free_inodes_count);
@@ -534,21 +477,23 @@ void ext2_check_inodes_bitmap (struct super_block * sb)
 {
 	struct ext2_super_block * es = EXT2_SB(sb)->s_es;
 	unsigned long desc_count = 0, bitmap_count = 0;
+	struct buffer_head *bitmap_bh = NULL;
 	int i;
 
 	for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
-		struct ext2_group_desc *desc = ext2_get_group_desc(sb, i, NULL);
-		struct buffer_head *bh;
+		struct ext2_group_desc *desc;
 		unsigned x;
 
+		desc = ext2_get_group_desc(sb, i, NULL);
 		if (!desc)
 			continue;
 		desc_count += le16_to_cpu(desc->bg_free_inodes_count);
-		bh = load_inode_bitmap (sb, i);
-		if (IS_ERR(bh))
+		brelse(bitmap_bh);
+		bitmap_bh = read_inode_bitmap(sb, i);
+		if (!bitmap_bh)
 			continue;
 		
-		x = ext2_count_free (bh, EXT2_INODES_PER_GROUP(sb) / 8);
+		x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8);
 		if (le16_to_cpu(desc->bg_free_inodes_count) != x)
 			ext2_error (sb, "ext2_check_inodes_bitmap",
 				    "Wrong free inodes count in group %d, "
@@ -556,8 +501,9 @@ void ext2_check_inodes_bitmap (struct super_block * sb)
 				    le16_to_cpu(desc->bg_free_inodes_count), x);
 		bitmap_count += x;
 	}
+	brelse(bitmap_bh);
 	if (le32_to_cpu(es->s_free_inodes_count) != bitmap_count)
-		ext2_error (sb, "ext2_check_inodes_bitmap",
+		ext2_error(sb, "ext2_check_inodes_bitmap",
 			    "Wrong free inodes count in super block, "
 			    "stored = %lu, counted = %lu",
 			    (unsigned long)le32_to_cpu(es->s_free_inodes_count),
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index fde0fdd39d83..d07d36b9f396 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -142,12 +142,6 @@ static void ext2_put_super (struct super_block * sb)
 		if (sbi->s_group_desc[i])
 			brelse (sbi->s_group_desc[i]);
 	kfree(sbi->s_group_desc);
-	for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
-		if (sbi->s_inode_bitmap[i])
-			brelse (sbi->s_inode_bitmap[i]);
-	for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
-		if (sbi->s_block_bitmap[i])
-			brelse (sbi->s_block_bitmap[i]);
 	brelse (sbi->s_sbh);
 	sb->u.generic_sbp = NULL;
 	kfree(sbi);
@@ -686,14 +680,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 		db_count = i;
 		goto failed_mount2;
 	}
-	for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) {
-		sbi->s_inode_bitmap_number[i] = 0;
-		sbi->s_inode_bitmap[i] = NULL;
-		sbi->s_block_bitmap_number[i] = 0;
-		sbi->s_block_bitmap[i] = NULL;
-	}
-	sbi->s_loaded_inode_bitmaps = 0;
-	sbi->s_loaded_block_bitmaps = 0;
 	sbi->s_gdb_count = db_count;
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 	/*
diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h
index 47f53158e755..070f5b630581 100644
--- a/include/linux/ext2_fs_sb.h
+++ b/include/linux/ext2_fs_sb.h
@@ -16,14 +16,6 @@
 #ifndef _LINUX_EXT2_FS_SB
 #define _LINUX_EXT2_FS_SB
 
-/*
- * The following is not needed anymore since the descriptors buffer
- * heads are now dynamically allocated
- */
-/* #define EXT2_MAX_GROUP_DESC	8 */
-
-#define EXT2_MAX_GROUP_LOADED	8
-
 /*
  * second extended-fs super-block data in memory
  */
@@ -41,12 +33,6 @@ struct ext2_sb_info {
 	struct buffer_head * s_sbh;	/* Buffer containing the super block */
 	struct ext2_super_block * s_es;	/* Pointer to the super block in the buffer */
 	struct buffer_head ** s_group_desc;
-	unsigned short s_loaded_inode_bitmaps;
-	unsigned short s_loaded_block_bitmaps;
-	unsigned long s_inode_bitmap_number[EXT2_MAX_GROUP_LOADED];
-	struct buffer_head * s_inode_bitmap[EXT2_MAX_GROUP_LOADED];
-	unsigned long s_block_bitmap_number[EXT2_MAX_GROUP_LOADED];
-	struct buffer_head * s_block_bitmap[EXT2_MAX_GROUP_LOADED];
 	unsigned long  s_mount_opt;
 	uid_t s_resuid;
 	gid_t s_resgid;
-- 
cgit v1.2.3


From 34cb9226ec0b10bf152beffdfb669595c7d40490 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:30:20 -0700
Subject: [PATCH] Remove ext3's buffer_head cache

Removes ext3's open-coded inode and allocation bitmap LRUs.

This patch includes a cleanup to ext3_new_block().  The local variables
`bh', `bh2', `i', `j', `k' and `tmp' have been renamed to something
more palatable.
---
 fs/ext3/balloc.c           | 475 +++++++++++++--------------------------------
 fs/ext3/ialloc.c           | 243 ++++++++---------------
 fs/ext3/super.c            |  12 --
 include/linux/ext3_fs_sb.h |  14 --
 4 files changed, 216 insertions(+), 528 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index c5cc2178ad4a..f733c4b50686 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -76,192 +76,36 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
  * Read the bitmap for a given block_group, reading into the specified 
  * slot in the superblock's bitmap cache.
  *
- * Return >=0 on success or a -ve error code.
+ * Return buffer_head on success or NULL in case of failure.
  */
-
-static int read_block_bitmap (struct super_block * sb,
-			       unsigned int block_group,
-			       unsigned long bitmap_nr)
+static struct buffer_head *
+read_block_bitmap(struct super_block *sb, unsigned int block_group)
 {
-	struct ext3_group_desc * gdp;
+	struct ext3_group_desc * desc;
 	struct buffer_head * bh = NULL;
-	int retval = -EIO;
 	
-	gdp = ext3_get_group_desc (sb, block_group, NULL);
-	if (!gdp)
+	desc = ext3_get_group_desc (sb, block_group, NULL);
+	if (!desc)
 		goto error_out;
-	retval = 0;
-	bh = sb_bread(sb, le32_to_cpu(gdp->bg_block_bitmap));
-	if (!bh) {
+	bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
+	if (!bh)
 		ext3_error (sb, "read_block_bitmap",
 			    "Cannot read block bitmap - "
 			    "block_group = %d, block_bitmap = %lu",
-			    block_group, (unsigned long) gdp->bg_block_bitmap);
-		retval = -EIO;
-	}
-	/*
-	 * On IO error, just leave a zero in the superblock's block pointer for
-	 * this group.  The IO will be retried next time.
-	 */
+			    block_group, (unsigned long) desc->bg_block_bitmap);
 error_out:
-	sb->u.ext3_sb.s_block_bitmap_number[bitmap_nr] = block_group;
-	sb->u.ext3_sb.s_block_bitmap[bitmap_nr] = bh;
-	return retval;
-}
-
-/*
- * load_block_bitmap loads the block bitmap for a blocks group
- *
- * It maintains a cache for the last bitmaps loaded.  This cache is managed
- * with a LRU algorithm.
- *
- * Notes:
- * 1/ There is one cache per mounted file system.
- * 2/ If the file system contains less than EXT3_MAX_GROUP_LOADED groups,
- *    this function reads the bitmap without maintaining a LRU cache.
- * 
- * Return the slot used to store the bitmap, or a -ve error code.
- */
-static int __load_block_bitmap (struct super_block * sb,
-			        unsigned int block_group)
-{
-	int i, j, retval = 0;
-	unsigned long block_bitmap_number;
-	struct buffer_head * block_bitmap;
-
-	if (block_group >= sb->u.ext3_sb.s_groups_count)
-		ext3_panic (sb, "load_block_bitmap",
-			    "block_group >= groups_count - "
-			    "block_group = %d, groups_count = %lu",
-			    block_group, sb->u.ext3_sb.s_groups_count);
-
-	if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED) {
-		if (sb->u.ext3_sb.s_block_bitmap[block_group]) {
-			if (sb->u.ext3_sb.s_block_bitmap_number[block_group] ==
-			    block_group)
-				return block_group;
-			ext3_error (sb, "__load_block_bitmap",
-				    "block_group != block_bitmap_number");
-		}
-		retval = read_block_bitmap (sb, block_group, block_group);
-		if (retval < 0)
-			return retval;
-		return block_group;
-	}
-
-	for (i = 0; i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
-		    sb->u.ext3_sb.s_block_bitmap_number[i] != block_group; i++)
-		;
-	if (i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
-  	    sb->u.ext3_sb.s_block_bitmap_number[i] == block_group) {
-		block_bitmap_number = sb->u.ext3_sb.s_block_bitmap_number[i];
-		block_bitmap = sb->u.ext3_sb.s_block_bitmap[i];
-		for (j = i; j > 0; j--) {
-			sb->u.ext3_sb.s_block_bitmap_number[j] =
-				sb->u.ext3_sb.s_block_bitmap_number[j - 1];
-			sb->u.ext3_sb.s_block_bitmap[j] =
-				sb->u.ext3_sb.s_block_bitmap[j - 1];
-		}
-		sb->u.ext3_sb.s_block_bitmap_number[0] = block_bitmap_number;
-		sb->u.ext3_sb.s_block_bitmap[0] = block_bitmap;
-
-		/*
-		 * There's still one special case here --- if block_bitmap == 0
-		 * then our last attempt to read the bitmap failed and we have
-		 * just ended up caching that failure.  Try again to read it.
-		 */
-		if (!block_bitmap)
-			retval = read_block_bitmap (sb, block_group, 0);
-	} else {
-		if (sb->u.ext3_sb.s_loaded_block_bitmaps<EXT3_MAX_GROUP_LOADED)
-			sb->u.ext3_sb.s_loaded_block_bitmaps++;
-		else
-			brelse (sb->u.ext3_sb.s_block_bitmap
-					[EXT3_MAX_GROUP_LOADED - 1]);
-		for (j = sb->u.ext3_sb.s_loaded_block_bitmaps - 1;
-					j > 0;  j--) {
-			sb->u.ext3_sb.s_block_bitmap_number[j] =
-				sb->u.ext3_sb.s_block_bitmap_number[j - 1];
-			sb->u.ext3_sb.s_block_bitmap[j] =
-				sb->u.ext3_sb.s_block_bitmap[j - 1];
-		}
-		retval = read_block_bitmap (sb, block_group, 0);
-	}
-	return retval;
-}
-
-/*
- * Load the block bitmap for a given block group.  First of all do a couple
- * of fast lookups for common cases and then pass the request onto the guts
- * of the bitmap loader.
- *
- * Return the slot number of the group in the superblock bitmap cache's on
- * success, or a -ve error code.
- *
- * There is still one inconsistency here --- if the number of groups in this
- * filesystems is <= EXT3_MAX_GROUP_LOADED, then we have no way of 
- * differentiating between a group for which we have never performed a bitmap
- * IO request, and a group for which the last bitmap read request failed.
- */
-static inline int load_block_bitmap (struct super_block * sb,
-				     unsigned int block_group)
-{
-	int slot;
-	
-	/*
-	 * Do the lookup for the slot.  First of all, check if we're asking
-	 * for the same slot as last time, and did we succeed that last time?
-	 */
-	if (sb->u.ext3_sb.s_loaded_block_bitmaps > 0 &&
-	    sb->u.ext3_sb.s_block_bitmap_number[0] == block_group &&
-	    sb->u.ext3_sb.s_block_bitmap[0]) {
-		return 0;
-	}
-	/*
-	 * Or can we do a fast lookup based on a loaded group on a filesystem
-	 * small enough to be mapped directly into the superblock?
-	 */
-	else if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED && 
-		 sb->u.ext3_sb.s_block_bitmap_number[block_group]==block_group
-			&& sb->u.ext3_sb.s_block_bitmap[block_group]) {
-		slot = block_group;
-	}
-	/*
-	 * If not, then do a full lookup for this block group.
-	 */
-	else {
-		slot = __load_block_bitmap (sb, block_group);
-	}
-
-	/*
-	 * <0 means we just got an error
-	 */
-	if (slot < 0)
-		return slot;
-	
-	/*
-	 * If it's a valid slot, we may still have cached a previous IO error,
-	 * in which case the bh in the superblock cache will be zero.
-	 */
-	if (!sb->u.ext3_sb.s_block_bitmap[slot])
-		return -EIO;
-	
-	/*
-	 * Must have been read in OK to get this far.
-	 */
-	return slot;
+	return bh;
 }
 
 /* Free given blocks, update quota and i_blocks field */
 void ext3_free_blocks (handle_t *handle, struct inode * inode,
 			unsigned long block, unsigned long count)
 {
-	struct buffer_head *bitmap_bh;
+	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *gd_bh;
 	unsigned long block_group;
 	unsigned long bit;
 	unsigned long i;
-	int bitmap_nr;
 	unsigned long overflow;
 	struct super_block * sb;
 	struct ext3_group_desc * gdp;
@@ -300,11 +144,10 @@ do_more:
 		overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
 		count -= overflow;
 	}
-	bitmap_nr = load_block_bitmap (sb, block_group);
-	if (bitmap_nr < 0)
+	brelse(bitmap_bh);
+	bitmap_bh = read_block_bitmap(sb, block_group);
+	if (!bitmap_bh)
 		goto error_return;
-	
-	bitmap_bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
 	gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
 	if (!gdp)
 		goto error_return;
@@ -421,6 +264,7 @@ do_more:
 	}
 	sb->s_dirt = 1;
 error_return:
+	brelse(bitmap_bh);
 	ext3_std_error(sb, err);
 	unlock_super(sb);
 	if (dquot_freed_blocks)
@@ -530,29 +374,28 @@ static int find_next_usable_block(int start,
  * bitmap, and then for any free bit if that fails.
  * This function also updates quota and i_blocks field.
  */
-int ext3_new_block (handle_t *handle, struct inode * inode,
-		unsigned long goal, u32 * prealloc_count,
-		u32 * prealloc_block, int * errp)
+int
+ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
+		u32 *prealloc_count, u32 *prealloc_block, int *errp)
 {
-	struct buffer_head * bh, *bhtmp;
-	struct buffer_head * bh2;
-#if 0
-	char * p, * r;
-#endif
-	int i, j, k, tmp, alloctmp;
-	int bitmap_nr;
+	struct buffer_head *bitmap_bh = NULL;	/* bh */
+	struct buffer_head *gdp_bh;		/* bh2 */
+	int group_no;				/* i */
+	int ret_block;				/* j */
+	int bit;				/* k */
+	int target_block;			/* tmp */
 	int fatal = 0, err;
 	int performed_allocation = 0;
-	struct super_block * sb;
-	struct ext3_group_desc * gdp;
-	struct ext3_super_block * es;
+	struct super_block *sb;
+	struct ext3_group_desc *gdp;
+	struct ext3_super_block *es;
 #ifdef EXT3FS_DEBUG
 	static int goal_hits = 0, goal_attempts = 0;
 #endif
 	*errp = -ENOSPC;
 	sb = inode->i_sb;
 	if (!sb) {
-		printk ("ext3_new_block: nonexistent device");
+		printk("ext3_new_block: nonexistent device");
 		return 0;
 	}
 
@@ -564,17 +407,17 @@ int ext3_new_block (handle_t *handle, struct inode * inode,
 		return 0;
 	}
 
-	lock_super (sb);
+	lock_super(sb);
 	es = sb->u.ext3_sb.s_es;
 	if (le32_to_cpu(es->s_free_blocks_count) <=
 			le32_to_cpu(es->s_r_blocks_count) &&
 	    ((sb->u.ext3_sb.s_resuid != current->fsuid) &&
 	     (sb->u.ext3_sb.s_resgid == 0 ||
-	      !in_group_p (sb->u.ext3_sb.s_resgid)) && 
+	      !in_group_p(sb->u.ext3_sb.s_resgid)) && 
 	     !capable(CAP_SYS_RESOURCE)))
 		goto out;
 
-	ext3_debug ("goal=%lu.\n", goal);
+	ext3_debug("goal=%lu.\n", goal);
 
 	/*
 	 * First, test whether the goal block is free.
@@ -582,64 +425,62 @@ int ext3_new_block (handle_t *handle, struct inode * inode,
 	if (goal < le32_to_cpu(es->s_first_data_block) ||
 	    goal >= le32_to_cpu(es->s_blocks_count))
 		goal = le32_to_cpu(es->s_first_data_block);
-	i = (goal - le32_to_cpu(es->s_first_data_block)) /
+	group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
 			EXT3_BLOCKS_PER_GROUP(sb);
-	gdp = ext3_get_group_desc (sb, i, &bh2);
+	gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
 	if (!gdp)
 		goto io_error;
 
 	if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
-		j = ((goal - le32_to_cpu(es->s_first_data_block)) %
+		ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
 				EXT3_BLOCKS_PER_GROUP(sb));
 #ifdef EXT3FS_DEBUG
-		if (j)
+		if (ret_block)
 			goal_attempts++;
 #endif
-		bitmap_nr = load_block_bitmap (sb, i);
-		if (bitmap_nr < 0)
+		bitmap_bh = read_block_bitmap(sb, group_no);
+		if (!bitmap_bh)
 			goto io_error;
-		
-		bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
 
-		ext3_debug ("goal is at %d:%d.\n", i, j);
+		ext3_debug("goal is at %d:%d.\n", group_no, ret_block);
 
-		if (ext3_test_allocatable(j, bh)) {
+		if (ext3_test_allocatable(ret_block, bitmap_bh)) {
 #ifdef EXT3FS_DEBUG
 			goal_hits++;
-			ext3_debug ("goal bit allocated.\n");
+			ext3_debug("goal bit allocated.\n");
 #endif
 			goto got_block;
 		}
 
-		j = find_next_usable_block(j, bh, EXT3_BLOCKS_PER_GROUP(sb));
-		if (j >= 0)
+		ret_block = find_next_usable_block(ret_block, bitmap_bh,
+				EXT3_BLOCKS_PER_GROUP(sb));
+		if (ret_block >= 0)
 			goto search_back;
 	}
 
-	ext3_debug ("Bit not found in block group %d.\n", i);
+	ext3_debug("Bit not found in block group %d.\n", group_no);
 
 	/*
 	 * Now search the rest of the groups.  We assume that 
 	 * i and gdp correctly point to the last group visited.
 	 */
-	for (k = 0; k < sb->u.ext3_sb.s_groups_count; k++) {
-		i++;
-		if (i >= sb->u.ext3_sb.s_groups_count)
-			i = 0;
-		gdp = ext3_get_group_desc (sb, i, &bh2);
+	for (bit = 0; bit < sb->u.ext3_sb.s_groups_count; bit++) {
+		group_no++;
+		if (group_no >= sb->u.ext3_sb.s_groups_count)
+			group_no = 0;
+		gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
 		if (!gdp) {
 			*errp = -EIO;
 			goto out;
 		}
 		if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
-			bitmap_nr = load_block_bitmap (sb, i);
-			if (bitmap_nr < 0)
+			brelse(bitmap_bh);
+			bitmap_bh = read_block_bitmap(sb, group_no);
+			if (!bitmap_bh)
 				goto io_error;
-	
-			bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
-			j = find_next_usable_block(-1, bh, 
+			ret_block = find_next_usable_block(-1, bitmap_bh, 
 						   EXT3_BLOCKS_PER_GROUP(sb));
-			if (j >= 0) 
+			if (ret_block >= 0) 
 				goto search_back;
 		}
 	}
@@ -653,47 +494,51 @@ search_back:
 	 * bitmap.  Now search backwards up to 7 bits to find the
 	 * start of this group of free blocks.
 	 */
-	for (	k = 0;
-		k < 7 && j > 0 && ext3_test_allocatable(j - 1, bh);
-		k++, j--)
+	for (	bit = 0;
+		bit < 7 && ret_block > 0 &&
+			ext3_test_allocatable(ret_block - 1, bitmap_bh);
+		bit++, ret_block--)
 		;
 	
 got_block:
 
-	ext3_debug ("using block group %d(%d)\n", i, gdp->bg_free_blocks_count);
+	ext3_debug("using block group %d(%d)\n", i, gdp->bg_free_blocks_count);
 
 	/* Make sure we use undo access for the bitmap, because it is
            critical that we do the frozen_data COW on bitmap buffers in
            all cases even if the buffer is in BJ_Forget state in the
            committing transaction.  */
-	BUFFER_TRACE(bh, "get undo access for marking new block");
-	fatal = ext3_journal_get_undo_access(handle, bh);
-	if (fatal) goto out;
+	BUFFER_TRACE(bitmap_bh, "get undo access for marking new block");
+	fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
+	if (fatal)
+		goto out;
 	
-	BUFFER_TRACE(bh2, "get_write_access");
-	fatal = ext3_journal_get_write_access(handle, bh2);
-	if (fatal) goto out;
+	BUFFER_TRACE(gdp_bh, "get_write_access");
+	fatal = ext3_journal_get_write_access(handle, gdp_bh);
+	if (fatal)
+		goto out;
 
 	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
 	fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
-	if (fatal) goto out;
+	if (fatal)
+		goto out;
 
-	tmp = j + i * EXT3_BLOCKS_PER_GROUP(sb)
+	target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb)
 				+ le32_to_cpu(es->s_first_data_block);
 
-	if (tmp == le32_to_cpu(gdp->bg_block_bitmap) ||
-	    tmp == le32_to_cpu(gdp->bg_inode_bitmap) ||
-	    in_range (tmp, le32_to_cpu(gdp->bg_inode_table),
+	if (target_block == le32_to_cpu(gdp->bg_block_bitmap) ||
+	    target_block == le32_to_cpu(gdp->bg_inode_bitmap) ||
+	    in_range(target_block, le32_to_cpu(gdp->bg_inode_table),
 		      sb->u.ext3_sb.s_itb_per_group))
-		ext3_error (sb, "ext3_new_block",
+		ext3_error(sb, "ext3_new_block",
 			    "Allocating block in system zone - "
-			    "block = %u", tmp);
+			    "block = %u", target_block);
 
 	/* The superblock lock should guard against anybody else beating
 	 * us to this point! */
-	J_ASSERT_BH(bh, !ext3_test_bit(j, bh->b_data));
-	BUFFER_TRACE(bh, "setting bitmap bit");
-	ext3_set_bit(j, bh->b_data);
+	J_ASSERT_BH(bitmap_bh, !ext3_test_bit(ret_block, bitmap_bh->b_data));
+	BUFFER_TRACE(bitmap_bh, "setting bitmap bit");
+	ext3_set_bit(ret_block, bitmap_bh->b_data);
 	performed_allocation = 1;
 
 #ifdef CONFIG_JBD_DEBUG
@@ -701,80 +546,33 @@ got_block:
 		struct buffer_head *debug_bh;
 
 		/* Record bitmap buffer state in the newly allocated block */
-		debug_bh = sb_find_get_block(sb, tmp);
+		debug_bh = sb_find_get_block(sb, target_block);
 		if (debug_bh) {
 			BUFFER_TRACE(debug_bh, "state when allocated");
-			BUFFER_TRACE2(debug_bh, bh, "bitmap state");
+			BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
 			brelse(debug_bh);
 		}
 	}
 #endif
-	if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data)
-		J_ASSERT_BH(bh, !ext3_test_bit(j, bh2jh(bh)->b_committed_data));
-	bhtmp = bh;
-	alloctmp = j;
-
-	ext3_debug ("found bit %d\n", j);
-
-	/*
-	 * Do block preallocation now if required.
-	 */
-#ifdef EXT3_PREALLOCATE
-	/*
-	 * akpm: this is not enabled for ext3.  Need to use
-	 * ext3_test_allocatable()
-	 */
-	/* Writer: ->i_prealloc* */
-	if (prealloc_count && !*prealloc_count) {
-		int	prealloc_goal;
-		unsigned long next_block = tmp + 1;
-
-		prealloc_goal = es->s_prealloc_blocks ?
-			es->s_prealloc_blocks : EXT3_DEFAULT_PREALLOC_BLOCKS;
-
-		*prealloc_block = next_block;
-		/* Writer: end */
-		for (k = 1;
-		     k < prealloc_goal && (j + k) < EXT3_BLOCKS_PER_GROUP(sb);
-		     k++, next_block++) {
-			if (DQUOT_PREALLOC_BLOCK(inode, 1))
-				break;
-			/* Writer: ->i_prealloc* */
-			if (*prealloc_block + *prealloc_count != next_block ||
-			    ext3_set_bit (j + k, bh->b_data)) {
-				/* Writer: end */
-				DQUOT_FREE_BLOCK(inode, 1);
- 				break;
-			}
-			(*prealloc_count)++;
-			/* Writer: end */
-		}	
-		/*
-		 * As soon as we go for per-group spinlocks we'll need these
-		 * done inside the loop above.
-		 */
-		gdp->bg_free_blocks_count =
-			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
-			       (k - 1));
-		es->s_free_blocks_count =
-			cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) -
-			       (k - 1));
-		ext3_debug ("Preallocated a further %lu bits.\n",
-			       (k - 1));
-	}
-#endif
+	if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data)
+		J_ASSERT_BH(bitmap_bh,
+			!ext3_test_bit(ret_block,
+					bh2jh(bitmap_bh)->b_committed_data));
+	ext3_debug("found bit %d\n", ret_block);
 
-	j = tmp;
+	/* ret_block was blockgroup-relative.  Now it becomes fs-relative */
+	ret_block = target_block;
 
-	BUFFER_TRACE(bh, "journal_dirty_metadata for bitmap block");
-	err = ext3_journal_dirty_metadata(handle, bh);
-	if (!fatal) fatal = err;
+	BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for bitmap block");
+	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
+	if (!fatal)
+		fatal = err;
 	
-	if (j >= le32_to_cpu(es->s_blocks_count)) {
-		ext3_error (sb, "ext3_new_block",
+	if (ret_block >= le32_to_cpu(es->s_blocks_count)) {
+		ext3_error(sb, "ext3_new_block",
 			    "block(%d) >= blocks count(%d) - "
-			    "block_group = %d, es == %p ",j,
-			le32_to_cpu(es->s_blocks_count), i, es);
+			    "block_group = %d, es == %p ", ret_block,
+			le32_to_cpu(es->s_blocks_count), group_no, es);
 		goto out;
 	}
 
@@ -783,30 +581,33 @@ got_block:
 	 * list of some description.  We don't know in advance whether
 	 * the caller wants to use it as metadata or data.
 	 */
-
-	ext3_debug ("allocating block %d. "
-		    "Goal hits %d of %d.\n", j, goal_hits, goal_attempts);
+	ext3_debug("allocating block %d. Goal hits %d of %d.\n",
+			ret_block, goal_hits, goal_attempts);
 
 	gdp->bg_free_blocks_count =
 			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
 	es->s_free_blocks_count =
 			cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - 1);
 
-	BUFFER_TRACE(bh2, "journal_dirty_metadata for group descriptor");
-	err = ext3_journal_dirty_metadata(handle, bh2);
-	if (!fatal) fatal = err;
-	
-	BUFFER_TRACE(bh, "journal_dirty_metadata for superblock");
+	BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
+	err = ext3_journal_dirty_metadata(handle, gdp_bh);
+	if (!fatal)
+		fatal = err;
+
+	BUFFER_TRACE(sb->u.ext3_sb.s_sbh,
+			"journal_dirty_metadata for superblock");
 	err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
-	if (!fatal) fatal = err;
+	if (!fatal)
+		fatal = err;
 
 	sb->s_dirt = 1;
 	if (fatal)
 		goto out;
 
-	unlock_super (sb);
+	unlock_super(sb);
 	*errp = 0;
-	return j;
+	brelse(bitmap_bh);
+	return ret_block;
 	
 io_error:
 	*errp = -EIO;
@@ -815,55 +616,57 @@ out:
 		*errp = fatal;
 		ext3_std_error(sb, fatal);
 	}
-	unlock_super (sb);
+	unlock_super(sb);
 	/*
 	 * Undo the block allocation
 	 */
 	if (!performed_allocation)
 		DQUOT_FREE_BLOCK(inode, 1);
+	brelse(bitmap_bh);
 	return 0;
 	
 }
 
-unsigned long ext3_count_free_blocks (struct super_block * sb)
+unsigned long ext3_count_free_blocks(struct super_block *sb)
 {
 #ifdef EXT3FS_DEBUG
-	struct ext3_super_block * es;
+	struct ext3_super_block *es;
 	unsigned long desc_count, bitmap_count, x;
-	int bitmap_nr;
-	struct ext3_group_desc * gdp;
+	struct buffer_head *bitmap_bh = NULL;
+	struct ext3_group_desc *gdp;
 	int i;
 	
-	lock_super (sb);
+	lock_super(sb);
 	es = sb->u.ext3_sb.s_es;
 	desc_count = 0;
 	bitmap_count = 0;
 	gdp = NULL;
 	for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
-		gdp = ext3_get_group_desc (sb, i, NULL);
+		gdp = ext3_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
-		bitmap_nr = load_block_bitmap (sb, i);
-		if (bitmap_nr < 0)
+		brelse(bitmap_bh);
+		bitmap_bh = read_block_bitmap(sb, i);
+		if (bitmap_bh == NULL)
 			continue;
 		
-		x = ext3_count_free (sb->u.ext3_sb.s_block_bitmap[bitmap_nr],
-				     sb->s_blocksize);
-		printk ("group %d: stored = %d, counted = %lu\n",
+		x = ext3_count_free(bitmap_bh, sb->s_blocksize);
+		printk("group %d: stored = %d, counted = %lu\n",
 			i, le16_to_cpu(gdp->bg_free_blocks_count), x);
 		bitmap_count += x;
 	}
+	brelse(bitmap_bh);
 	printk("ext3_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
 	       le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count);
-	unlock_super (sb);
+	unlock_super(sb);
 	return bitmap_count;
 #else
 	return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count);
 #endif
 }
 
-static inline int block_in_use (unsigned long block,
+static inline int block_in_use(unsigned long block,
 				struct super_block * sb,
 				unsigned char * map)
 {
@@ -928,12 +731,11 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
 /* Called at mount-time, super-block is locked */
 void ext3_check_blocks_bitmap (struct super_block * sb)
 {
-	struct buffer_head * bh;
-	struct ext3_super_block * es;
+	struct ext3_super_block *es;
 	unsigned long desc_count, bitmap_count, x, j;
 	unsigned long desc_blocks;
-	int bitmap_nr;
-	struct ext3_group_desc * gdp;
+	struct buffer_head *bitmap_bh = NULL;
+	struct ext3_group_desc *gdp;
 	int i;
 
 	es = sb->u.ext3_sb.s_es;
@@ -945,43 +747,43 @@ void ext3_check_blocks_bitmap (struct super_block * sb)
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
-		bitmap_nr = load_block_bitmap (sb, i);
-		if (bitmap_nr < 0)
+		brelse(bitmap_bh);
+		bitmap_bh = read_block_bitmap(sb, i);
+		if (bitmap_bh == NULL)
 			continue;
 
-		bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
-
-		if (ext3_bg_has_super(sb, i) && !ext3_test_bit(0, bh->b_data))
+		if (ext3_bg_has_super(sb, i) &&
+				!ext3_test_bit(0, bitmap_bh->b_data))
 			ext3_error(sb, __FUNCTION__,
 				   "Superblock in group %d is marked free", i);
 
 		desc_blocks = ext3_bg_num_gdb(sb, i);
 		for (j = 0; j < desc_blocks; j++)
-			if (!ext3_test_bit(j + 1, bh->b_data))
+			if (!ext3_test_bit(j + 1, bitmap_bh->b_data))
 				ext3_error(sb, __FUNCTION__,
 					   "Descriptor block #%ld in group "
 					   "%d is marked free", j, i);
 
 		if (!block_in_use (le32_to_cpu(gdp->bg_block_bitmap),
-						sb, bh->b_data))
+						sb, bitmap_bh->b_data))
 			ext3_error (sb, "ext3_check_blocks_bitmap",
 				    "Block bitmap for group %d is marked free",
 				    i);
 
 		if (!block_in_use (le32_to_cpu(gdp->bg_inode_bitmap),
-						sb, bh->b_data))
+						sb, bitmap_bh->b_data))
 			ext3_error (sb, "ext3_check_blocks_bitmap",
 				    "Inode bitmap for group %d is marked free",
 				    i);
 
 		for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++)
 			if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
-							sb, bh->b_data))
+							sb, bitmap_bh->b_data))
 				ext3_error (sb, "ext3_check_blocks_bitmap",
 					    "Block #%d of the inode table in "
 					    "group %d is marked free", j, i);
 
-		x = ext3_count_free (bh, sb->s_blocksize);
+		x = ext3_count_free(bitmap_bh, sb->s_blocksize);
 		if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
 			ext3_error (sb, "ext3_check_blocks_bitmap",
 				    "Wrong free blocks count for group %d, "
@@ -989,6 +791,7 @@ void ext3_check_blocks_bitmap (struct super_block * sb)
 				    le16_to_cpu(gdp->bg_free_blocks_count), x);
 		bitmap_count += x;
 	}
+	brelse(bitmap_bh);
 	if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
 		ext3_error (sb, "ext3_check_blocks_bitmap",
 			"Wrong free blocks count in super block, "
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 20c60a813704..3cf6c555a1c9 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -36,8 +36,7 @@
  *
  * The file system contains group descriptors which are located after the
  * super block.  Each descriptor contains the number of the bitmap block and
- * the free blocks count in the block.  The descriptors are loaded in memory
- * when a file system is mounted (see ext3_read_super).
+ * the free blocks count in the block.
  */
 
 
@@ -45,118 +44,26 @@
  * Read the inode allocation bitmap for a given block_group, reading
  * into the specified slot in the superblock's bitmap cache.
  *
- * Return >=0 on success or a -ve error code.
+ * Return buffer_head of bitmap on success or NULL.
  */
-static int read_inode_bitmap (struct super_block * sb,
-			       unsigned long block_group,
-			       unsigned int bitmap_nr)
+static struct buffer_head *
+read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 {
-	struct ext3_group_desc * gdp;
-	struct buffer_head * bh = NULL;
-	int retval = 0;
+	struct ext3_group_desc *desc;
+	struct buffer_head *bh = NULL;
 
-	gdp = ext3_get_group_desc (sb, block_group, NULL);
-	if (!gdp) {
-		retval = -EIO;
+	desc = ext3_get_group_desc(sb, block_group, NULL);
+	if (!desc)
 		goto error_out;
-	}
-	bh = sb_bread(sb, le32_to_cpu(gdp->bg_inode_bitmap));
-	if (!bh) {
-		ext3_error (sb, "read_inode_bitmap",
+
+	bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
+	if (!bh)
+		ext3_error(sb, "read_inode_bitmap",
 			    "Cannot read inode bitmap - "
 			    "block_group = %lu, inode_bitmap = %lu",
-			    block_group, (unsigned long) gdp->bg_inode_bitmap);
-		retval = -EIO;
-	}
-	/*
-	 * On IO error, just leave a zero in the superblock's block pointer for
-	 * this group.  The IO will be retried next time.
-	 */
+			    block_group, (unsigned long) desc->bg_inode_bitmap);
 error_out:
-	sb->u.ext3_sb.s_inode_bitmap_number[bitmap_nr] = block_group;
-	sb->u.ext3_sb.s_inode_bitmap[bitmap_nr] = bh;
-	return retval;
-}
-
-/*
- * load_inode_bitmap loads the inode bitmap for a blocks group
- *
- * It maintains a cache for the last bitmaps loaded.  This cache is managed
- * with a LRU algorithm.
- *
- * Notes:
- * 1/ There is one cache per mounted file system.
- * 2/ If the file system contains less than EXT3_MAX_GROUP_LOADED groups,
- *    this function reads the bitmap without maintaining a LRU cache.
- *
- * Return the slot used to store the bitmap, or a -ve error code.
- */
-static int load_inode_bitmap (struct super_block * sb,
-			      unsigned int block_group)
-{
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	unsigned long inode_bitmap_number;
-	struct buffer_head * inode_bitmap;
-	int i, j, retval = 0;
-
-	if (block_group >= sbi->s_groups_count)
-		ext3_panic (sb, "load_inode_bitmap",
-			    "block_group >= groups_count - "
-			    "block_group = %d, groups_count = %lu",
-			    block_group, sbi->s_groups_count);
-	if (sbi->s_loaded_inode_bitmaps > 0 &&
-	    sbi->s_inode_bitmap_number[0] == block_group &&
-	    sbi->s_inode_bitmap[0] != NULL)
-		return 0;
-	if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED) {
-		if (sbi->s_inode_bitmap[block_group]) {
-			if (sbi->s_inode_bitmap_number[block_group] !=
-						block_group)
-				ext3_panic(sb, "load_inode_bitmap",
-					"block_group != inode_bitmap_number");
-			return block_group;
-		}
-		retval = read_inode_bitmap(sb, block_group, block_group);
-		if (retval < 0)
-			return retval;
-		return block_group;
-	}
-
-	for (i = 0; i < sbi->s_loaded_inode_bitmaps &&
-		    sbi->s_inode_bitmap_number[i] != block_group; i++)
-		/* do nothing */;
-	if (i < sbi->s_loaded_inode_bitmaps &&
-	    sbi->s_inode_bitmap_number[i] == block_group) {
-		inode_bitmap_number = sbi->s_inode_bitmap_number[i];
-		inode_bitmap = sbi->s_inode_bitmap[i];
-		for (j = i; j > 0; j--) {
-			sbi->s_inode_bitmap_number[j] =
-				sbi->s_inode_bitmap_number[j - 1];
-			sbi->s_inode_bitmap[j] = sbi->s_inode_bitmap[j - 1];
-		}
-		sbi->s_inode_bitmap_number[0] = inode_bitmap_number;
-		sbi->s_inode_bitmap[0] = inode_bitmap;
-
-		/*
-		 * There's still one special case here --- if inode_bitmap == 0
-		 * then our last attempt to read the bitmap failed and we have
-		 * just ended up caching that failure.  Try again to read it.
-		 */
-		if (!inode_bitmap)
-			retval = read_inode_bitmap (sb, block_group, 0);
-	} else {
-		if (sbi->s_loaded_inode_bitmaps < EXT3_MAX_GROUP_LOADED)
-			sbi->s_loaded_inode_bitmaps++;
-		else
-			brelse(sbi->s_inode_bitmap[EXT3_MAX_GROUP_LOADED - 1]);
-		for (j = sbi->s_loaded_inode_bitmaps - 1; j > 0; j--) {
-			sbi->s_inode_bitmap_number[j] =
-				sbi->s_inode_bitmap_number[j - 1];
-			sbi->s_inode_bitmap[j] = sbi->s_inode_bitmap[j - 1];
-		}
-		retval = read_inode_bitmap (sb, block_group, 0);
-	}
-	return retval;
+	return bh;
 }
 
 /*
@@ -180,11 +87,10 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	struct super_block * sb = inode->i_sb;
 	int is_directory;
 	unsigned long ino;
-	struct buffer_head * bh;
-	struct buffer_head * bh2;
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *bh2;
 	unsigned long block_group;
 	unsigned long bit;
-	int bitmap_nr;
 	struct ext3_group_desc * gdp;
 	struct ext3_super_block * es;
 	int fatal = 0, err;
@@ -229,19 +135,17 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	}
 	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
 	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
-	bitmap_nr = load_inode_bitmap (sb, block_group);
-	if (bitmap_nr < 0)
+	bitmap_bh = read_inode_bitmap(sb, block_group);
+	if (!bitmap_bh)
 		goto error_return;
 
-	bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
-
-	BUFFER_TRACE(bh, "get_write_access");
-	fatal = ext3_journal_get_write_access(handle, bh);
+	BUFFER_TRACE(bitmap_bh, "get_write_access");
+	fatal = ext3_journal_get_write_access(handle, bitmap_bh);
 	if (fatal)
 		goto error_return;
 
 	/* Ok, now we can actually update the inode bitmaps.. */
-	if (!ext3_clear_bit (bit, bh->b_data))
+	if (!ext3_clear_bit(bit, bitmap_bh->b_data))
 		ext3_error (sb, "ext3_free_inode",
 			      "bit already cleared for inode %lu", ino);
 	else {
@@ -272,12 +176,13 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 		err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
 		if (!fatal) fatal = err;
 	}
-	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-	err = ext3_journal_dirty_metadata(handle, bh);
+	BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
+	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
 	if (!fatal)
 		fatal = err;
 	sb->s_dirt = 1;
 error_return:
+	brelse(bitmap_bh);
 	ext3_std_error(sb, fatal);
 	unlock_super(sb);
 }
@@ -292,20 +197,19 @@ error_return:
  * For other inodes, search forward from the parent directory's block
  * group to find a free inode.
  */
-struct inode * ext3_new_inode (handle_t *handle,
-				struct inode * dir, int mode)
+struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
 {
-	struct super_block * sb;
-	struct buffer_head * bh;
-	struct buffer_head * bh2;
+	struct super_block *sb;
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *bh2;
 	int i, j, avefreei;
 	struct inode * inode;
-	int bitmap_nr;
 	struct ext3_group_desc * gdp;
 	struct ext3_group_desc * tmp;
 	struct ext3_super_block * es;
 	struct ext3_inode_info *ei;
 	int err = 0;
+	struct inode *ret;
 
 	/* Cannot create files in a deleted directory */
 	if (!dir || !dir->i_nlink)
@@ -392,26 +296,25 @@ repeat:
 		goto out;
 
 	err = -EIO;
-	bitmap_nr = load_inode_bitmap (sb, i);
-	if (bitmap_nr < 0)
+	brelse(bitmap_bh);
+	bitmap_bh = read_inode_bitmap(sb, i);
+	if (!bitmap_bh)
 		goto fail;
 
-	bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
-
-	if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data,
+	if ((j = ext3_find_first_zero_bit((unsigned long *)bitmap_bh->b_data,
 				      EXT3_INODES_PER_GROUP(sb))) <
 	    EXT3_INODES_PER_GROUP(sb)) {
-		BUFFER_TRACE(bh, "get_write_access");
-		err = ext3_journal_get_write_access(handle, bh);
+		BUFFER_TRACE(bitmap_bh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, bitmap_bh);
 		if (err) goto fail;
 		
-		if (ext3_set_bit (j, bh->b_data)) {
+		if (ext3_set_bit(j, bitmap_bh->b_data)) {
 			ext3_error (sb, "ext3_new_inode",
 				      "bit already set for inode %d", j);
 			goto repeat;
 		}
-		BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-		err = ext3_journal_dirty_metadata(handle, bh);
+		BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, bitmap_bh);
 		if (err) goto fail;
 	} else {
 		if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) {
@@ -520,23 +423,27 @@ repeat:
 	err = ext3_mark_inode_dirty(handle, inode);
 	if (err) goto fail;
 	
-	unlock_super (sb);
+	unlock_super(sb);
+	ret = inode;
 	if(DQUOT_ALLOC_INODE(inode)) {
 		DQUOT_DROP(inode);
 		inode->i_flags |= S_NOQUOTA;
 		inode->i_nlink = 0;
 		iput(inode);
-		return ERR_PTR(-EDQUOT);
+		ret = ERR_PTR(-EDQUOT);
+	} else {
+		ext3_debug("allocating inode %lu\n", inode->i_ino);
 	}
-	ext3_debug ("allocating inode %lu\n", inode->i_ino);
-	return inode;
-
+	goto really_out;
 fail:
 	ext3_std_error(sb, err);
 out:
 	unlock_super(sb);
 	iput(inode);
-	return ERR_PTR(err);
+	ret = ERR_PTR(err);
+really_out:
+	brelse(bitmap_bh);
+	return ret;
 }
 
 /* Verify that we are loading a valid orphan from disk */
@@ -545,36 +452,37 @@ struct inode *ext3_orphan_get (struct super_block * sb, ino_t ino)
 	ino_t max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
 	unsigned long block_group;
 	int bit;
-	int bitmap_nr;
-	struct buffer_head *bh;
+	struct buffer_head *bitmap_bh = NULL;
 	struct inode *inode = NULL;
 	
 	/* Error cases - e2fsck has already cleaned up for us */
 	if (ino > max_ino) {
 		ext3_warning(sb, __FUNCTION__,
 			     "bad orphan ino %ld!  e2fsck was run?\n", ino);
-		return NULL;
+		goto out;
 	}
 
 	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
 	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
-	if ((bitmap_nr = load_inode_bitmap(sb, block_group)) < 0 ||
-	    !(bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr])) {
+	bitmap_bh = read_inode_bitmap(sb, block_group);
+	if (!bitmap_bh) {
 		ext3_warning(sb, __FUNCTION__,
 			     "inode bitmap error for orphan %ld\n", ino);
-		return NULL;
+		goto out;
 	}
 
 	/* Having the inode bit set should be a 100% indicator that this
 	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
 	 * inodes that were being truncated, so we can't check i_nlink==0.
 	 */
-	if (!ext3_test_bit(bit, bh->b_data) || !(inode = iget(sb, ino)) ||
-	    is_bad_inode(inode) || NEXT_ORPHAN(inode) > max_ino) {
+	if (!ext3_test_bit(bit, bitmap_bh->b_data) ||
+			!(inode = iget(sb, ino)) || is_bad_inode(inode) ||
+			NEXT_ORPHAN(inode) > max_ino) {
 		ext3_warning(sb, __FUNCTION__,
 			     "bad orphan inode %ld!  e2fsck was run?\n", ino);
 		printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%ld) = %d\n",
-		       bit, bh->b_blocknr, ext3_test_bit(bit, bh->b_data));
+			bit, bitmap_bh->b_blocknr,
+			ext3_test_bit(bit, bitmap_bh->b_data));
 		printk(KERN_NOTICE "inode=%p\n", inode);
 		if (inode) {
 			printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
@@ -587,19 +495,20 @@ struct inode *ext3_orphan_get (struct super_block * sb, ino_t ino)
 		if (inode && inode->i_nlink == 0)
 			inode->i_blocks = 0;
 		iput(inode);
-		return NULL;
+		inode = NULL;
 	}
-
+out:
+	brelse(bitmap_bh);
 	return inode;
 }
 
 unsigned long ext3_count_free_inodes (struct super_block * sb)
 {
 #ifdef EXT3FS_DEBUG
-	struct ext3_super_block * es;
+	struct ext3_super_block *es;
 	unsigned long desc_count, bitmap_count, x;
-	int bitmap_nr;
-	struct ext3_group_desc * gdp;
+	struct ext3_group_desc *gdp;
+	struct buffer_head *bitmap_bh = NULL;
 	int i;
 
 	lock_super (sb);
@@ -612,19 +521,20 @@ unsigned long ext3_count_free_inodes (struct super_block * sb)
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
-		bitmap_nr = load_inode_bitmap (sb, i);
-		if (bitmap_nr < 0)
+		brelse(bitmap_bh);
+		bitmap_bh = read_inode_bitmap(sb, i);
+		if (!bitmap_bh)
 			continue;
 
-		x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr],
-				     EXT3_INODES_PER_GROUP(sb) / 8);
-		printk ("group %d: stored = %d, counted = %lu\n",
+		x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8);
+		printk("group %d: stored = %d, counted = %lu\n",
 			i, le16_to_cpu(gdp->bg_free_inodes_count), x);
 		bitmap_count += x;
 	}
+	brelse(bitmap_bh);
 	printk("ext3_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
 		le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
-	unlock_super (sb);
+	unlock_super(sb);
 	return desc_count;
 #else
 	return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_inodes_count);
@@ -637,7 +547,7 @@ void ext3_check_inodes_bitmap (struct super_block * sb)
 {
 	struct ext3_super_block * es;
 	unsigned long desc_count, bitmap_count, x;
-	int bitmap_nr;
+	struct buffer_head *bitmap_bh = NULL;
 	struct ext3_group_desc * gdp;
 	int i;
 
@@ -650,12 +560,12 @@ void ext3_check_inodes_bitmap (struct super_block * sb)
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
-		bitmap_nr = load_inode_bitmap (sb, i);
-		if (bitmap_nr < 0)
+		brelse(bitmap_bh);
+		bitmap_bh = read_inode_bitmap(sb, i);
+		if (!bitmap_bh)
 			continue;
 
-		x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr],
-				     EXT3_INODES_PER_GROUP(sb) / 8);
+		x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8);
 		if (le16_to_cpu(gdp->bg_free_inodes_count) != x)
 			ext3_error (sb, "ext3_check_inodes_bitmap",
 				    "Wrong free inodes count in group %d, "
@@ -663,6 +573,7 @@ void ext3_check_inodes_bitmap (struct super_block * sb)
 				    le16_to_cpu(gdp->bg_free_inodes_count), x);
 		bitmap_count += x;
 	}
+	brelse(bitmap_bh);
 	if (le32_to_cpu(es->s_free_inodes_count) != bitmap_count)
 		ext3_error (sb, "ext3_check_inodes_bitmap",
 			    "Wrong free inodes count in super block, "
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6e166d31137b..f08897e90ad2 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -417,10 +417,6 @@ void ext3_put_super (struct super_block * sb)
 	for (i = 0; i < sbi->s_gdb_count; i++)
 		brelse(sbi->s_group_desc[i]);
 	kfree(sbi->s_group_desc);
-	for (i = 0; i < EXT3_MAX_GROUP_LOADED; i++)
-		brelse(sbi->s_inode_bitmap[i]);
-	for (i = 0; i < EXT3_MAX_GROUP_LOADED; i++)
-		brelse(sbi->s_block_bitmap[i]);
 	brelse(sbi->s_sbh);
 
 	/* Debugging code just in case the in-memory inode orphan list
@@ -1150,14 +1146,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		printk (KERN_ERR "EXT3-fs: group descriptors corrupted !\n");
 		goto failed_mount2;
 	}
-	for (i = 0; i < EXT3_MAX_GROUP_LOADED; i++) {
-		sbi->s_inode_bitmap_number[i] = 0;
-		sbi->s_inode_bitmap[i] = NULL;
-		sbi->s_block_bitmap_number[i] = 0;
-		sbi->s_block_bitmap[i] = NULL;
-	}
-	sbi->s_loaded_inode_bitmaps = 0;
-	sbi->s_loaded_block_bitmaps = 0;
 	sbi->s_gdb_count = db_count;
 	/*
 	 * set up enough so that it can read an inode
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index 6bda514a7a54..573aa3588735 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -21,14 +21,6 @@
 #include <linux/wait.h>
 #endif
 
-/*
- * The following is not needed anymore since the descriptors buffer
- * heads are now dynamically allocated
- */
-/* #define EXT3_MAX_GROUP_DESC	8 */
-
-#define EXT3_MAX_GROUP_LOADED	8
-
 /*
  * third extended-fs super-block data in memory
  */
@@ -46,12 +38,6 @@ struct ext3_sb_info {
 	struct buffer_head * s_sbh;	/* Buffer containing the super block */
 	struct ext3_super_block * s_es;	/* Pointer to the super block in the buffer */
 	struct buffer_head ** s_group_desc;
-	unsigned short s_loaded_inode_bitmaps;
-	unsigned short s_loaded_block_bitmaps;
-	unsigned long s_inode_bitmap_number[EXT3_MAX_GROUP_LOADED];
-	struct buffer_head * s_inode_bitmap[EXT3_MAX_GROUP_LOADED];
-	unsigned long s_block_bitmap_number[EXT3_MAX_GROUP_LOADED];
-	struct buffer_head * s_block_bitmap[EXT3_MAX_GROUP_LOADED];
 	unsigned long  s_mount_opt;
 	uid_t s_resuid;
 	gid_t s_resgid;
-- 
cgit v1.2.3


From 5226cca6c08884dd98caad07967fc79a5c7cad9e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:30:25 -0700
Subject: [PATCH] debug check for leaked blockdev buffers

Having just fiddled with the refcounts of blockdev buffers, I want some
way of assuring that the code is correct and is not leaking
buffer_heads.

There's no easy way to do this: if a blockdev page has pinned buffers
then truncate_complete_page just cuts it loose and we leak memory.

The patch adds a bit of debug code to catch these leaks.  This code,
PF_RADIX_TREE and buffer_error() need to be removed later on.
---
 fs/buffer.c           | 2 ++
 include/linux/sched.h | 2 +-
 mm/filemap.c          | 7 ++++++-
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index d46b55b0cf2c..6ba5de4d2051 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -467,7 +467,9 @@ void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
 	 * We really want to use invalidate_inode_pages2() for
 	 * that, but not until that's cleaned up.
 	 */
+	current->flags |= PF_INVALIDATE;
 	invalidate_inode_pages(bdev->bd_inode);
+	current->flags &= ~PF_INVALIDATE;
 }
 
 void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 390627c2f1f6..90fb402ee802 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -391,7 +391,7 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
 #define PF_FREEZE	0x00010000	/* this task should be frozen for suspend */
 #define PF_IOTHREAD	0x00020000	/* this thread is needed for doing I/O to swap */
 #define PF_FROZEN	0x00040000	/* frozen for system suspend */
-
+#define PF_INVALIDATE	0x00080000	/* debug: unmounting an fs. killme. */
 /*
  * Ptrace flags
  */
diff --git a/mm/filemap.c b/mm/filemap.c
index a31fbce9e196..86ea3c15e6e8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -177,8 +177,13 @@ static inline void truncate_partial_page(struct page *page, unsigned partial)
 static void truncate_complete_page(struct page *page)
 {
 	/* Leave it on the LRU if it gets converted into anonymous buffers */
-	if (!PagePrivate(page) || do_invalidatepage(page, 0))
+	if (!PagePrivate(page) || do_invalidatepage(page, 0)) {
 		lru_cache_del(page);
+	} else {
+		if (current->flags & PF_INVALIDATE)
+			printk("%s: buffer heads were leaked\n",
+				current->comm);
+	}
 	ClearPageDirty(page);
 	ClearPageUptodate(page);
 	remove_inode_page(page);
-- 
cgit v1.2.3


From 06be3a5e71ad8d5e55d8efc480c76142f0d982e2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:30:30 -0700
Subject: [PATCH] misc cleanups and fixes

- Comment and documentation fixlets

- Remove some unneeded fields from swapper_inode (these are a
  leftover from when I had swap using the filesystem IO functions).

- fix a printk bug in pci/pool.c: when dma_addr_t is 64 bit it
  generates a compile warning, and will print out garbage.  Cast it to
  unsigned long long.

- Convert some writeback #defines into enums (Steven Augart)
---
 Documentation/filesystems/porting |  4 +++-
 drivers/pci/pool.c                |  8 ++++----
 fs/block_dev.c                    |  2 --
 fs/fs-writeback.c                 |  5 +++--
 include/linux/writeback.h         | 18 +++++++++++-------
 mm/page_io.c                      |  2 +-
 mm/swap_state.c                   |  7 -------
 7 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 85281b6f4ff0..b88f356a5919 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -3,11 +3,13 @@ Changes since 2.5.0:
 --- 
 [recommended]
 
-New helpers: sb_bread(), sb_getblk(), sb_get_hash_table(), set_bh(),
+New helpers: sb_bread(), sb_getblk(), sb_find_get_block(), set_bh(),
 	sb_set_blocksize() and sb_min_blocksize().
 
 Use them.
 
+(sb_find_get_block() replaces 2.4's get_hash_table())
+
 --- 
 [recommended]
 
diff --git a/drivers/pci/pool.c b/drivers/pci/pool.c
index dec050f4b0d5..9465b0fc81f1 100644
--- a/drivers/pci/pool.c
+++ b/drivers/pci/pool.c
@@ -303,15 +303,15 @@ pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t dma)
 
 #ifdef	CONFIG_DEBUG_SLAB
 	if (((dma - page->dma) + (void *)page->vaddr) != vaddr) {
-		printk (KERN_ERR "pci_pool_free %s/%s, %p (bad vaddr)/%lx\n",
+		printk (KERN_ERR "pci_pool_free %s/%s, %p (bad vaddr)/%Lx\n",
 			pool->dev ? pool->dev->slot_name : NULL,
-			pool->name, vaddr, (unsigned long) dma);
+			pool->name, vaddr, (unsigned long long) dma);
 		return;
 	}
 	if (page->bitmap [map] & (1UL << block)) {
-		printk (KERN_ERR "pci_pool_free %s/%s, dma %x already free\n",
+		printk (KERN_ERR "pci_pool_free %s/%s, dma %Lx already free\n",
 			pool->dev ? pool->dev->slot_name : NULL,
-			pool->name, dma);
+			pool->name, (unsigned long long)dma);
 		return;
 	}
 	memset (vaddr, POOL_POISON_BYTE, pool->size);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 34f88ef66b0f..be2c8ef578de 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -23,8 +23,6 @@
 
 #include <asm/uaccess.h>
 
-#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
-
 static unsigned long max_block(struct block_device *bdev)
 {
 	unsigned int retval = ~0U;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1d695b939f2f..db84b9f8eec4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -287,8 +287,9 @@ out:
  *
  * This is a "memory cleansing" operation, not a "data integrity" operation.
  */
-void writeback_unlocked_inodes(int *nr_to_write, int sync_mode,
-				unsigned long *older_than_this)
+void writeback_unlocked_inodes(int *nr_to_write,
+			       enum writeback_sync_modes sync_mode,
+			       unsigned long *older_than_this)
 {
 	struct super_block *sb;
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a06b0f116ebd..1b2034aa5617 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -24,18 +24,22 @@ static inline int current_is_pdflush(void)
 /*
  * fs/fs-writeback.c
  */
-#define WB_SYNC_NONE	0	/* Don't wait on anything */
-#define WB_SYNC_LAST	1	/* Wait on the last-written mapping */
-#define WB_SYNC_ALL	2	/* Wait on every mapping */
-#define WB_SYNC_HOLD	3	/* Hold the inode on sb_dirty for sys_sync() */
-
-void writeback_unlocked_inodes(int *nr_to_write, int sync_mode,
-				unsigned long *older_than_this);
+enum writeback_sync_modes {
+	WB_SYNC_NONE =  0,	/* Don't wait on anything */
+	WB_SYNC_LAST =  1,	/* Wait on the last-written mapping */
+	WB_SYNC_ALL =   2,	/* Wait on every mapping */
+	WB_SYNC_HOLD =  3,	/* Hold the inode on sb_dirty for sys_sync() */
+};
+
+void writeback_unlocked_inodes(int *nr_to_write,
+			       enum writeback_sync_modes sync_mode,
+			       unsigned long *older_than_this);
 void wake_up_inode(struct inode *inode);
 void __wait_on_inode(struct inode * inode);
 void sync_inodes_sb(struct super_block *, int wait);
 void sync_inodes(int wait);
 
+/* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
 {
 	if (inode->i_state & I_LOCK)
diff --git a/mm/page_io.c b/mm/page_io.c
index b89925ee30ea..4a1ce82a5886 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -130,7 +130,7 @@ out:
  * swapper_space doesn't have a real inode, so it gets a special vm_writeback()
  * so we don't need swap special cases in generic_vm_writeback().
  *
- * Swap pages are PageLocked and PageWriteback while under writeout so that
+ * Swap pages are !PageLocked and PageWriteback while under writeout so that
  * memory allocators will throttle against them.
  */
 static int swap_vm_writeback(struct page *page, int *nr_to_write)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 4513649a1208..c831b5193865 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -21,16 +21,9 @@
 /*
  * swapper_inode doesn't do anything much.  It is really only here to
  * avoid some special-casing in other parts of the kernel.
- *
- * We set i_size to "infinity" to keep the page I/O functions happy.  The swap
- * block allocator makes sure that allocations are in-range.  A strange
- * number is chosen to prevent various arith overflows elsewhere.  For example,
- * `lblock' in block_read_full_page().
  */
 static struct inode swapper_inode = {
 	i_mapping:	&swapper_space,
-	i_size:		PAGE_SIZE * 0xffffffffLL,
-	i_blkbits:	PAGE_SHIFT,
 };
 
 extern struct address_space_operations swap_aops;
-- 
cgit v1.2.3


From f0e10c64f3e96dfa55db63407cf0add02fb94bf5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:30:34 -0700
Subject: [PATCH] pdflush cleanup

Writeback/pdflush cleanup patch from Steven Augart

* Exposes nr_pdflush_threads as /proc/sys/vm/nr_pdflush_threads, read-only.

  (I like this - I expect that management of the pdflush thread pool
  will be important for many-spindle machines, and this is a neat way
  of getting at the info).

* Adds minimum and maximum checking to the five writable pdflush
  and fs-writeback  parameters.

* Minor indentation fix in sysctl.c

* mm/pdflush.c now includes linux/writeback.h, which prototypes
  pdflush_operation.  This is so that the compiler can
  automatically check that the prototype matches the definition.

* Adds a few comments to existing code.
---
 include/linux/sysctl.h    |  6 +++++-
 include/linux/writeback.h |  7 +++++++
 kernel/sysctl.c           | 42 ++++++++++++++++++++++++++++++++++--------
 mm/page-writeback.c       | 12 +++++++++---
 mm/pdflush.c              |  8 +++++++-
 5 files changed, 62 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 488bc05dbcc1..da8115ec58f6 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -30,7 +30,10 @@
 
 struct file;
 
-#define CTL_MAXNAME 10
+#define CTL_MAXNAME 10		/* how many path components do we allow in a
+				   call to sysctl?   In other words, what is
+				   the largest acceptable value for the nlen
+				   member of a struct __sysctl_args to have? */
 
 struct __sysctl_args {
 	int *name;
@@ -145,6 +148,7 @@ enum
 	VM_DIRTY_SYNC=13,	/* dirty_sync_ratio */
 	VM_DIRTY_WB_CS=14,	/* dirty_writeback_centisecs */
 	VM_DIRTY_EXPIRE_CS=15,	/* dirty_expire_centisecs */
+	VM_NR_PDFLUSH_THREADS=16, /* nr_pdflush_threads */
 };
 
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 1b2034aa5617..af3ec94cb2ad 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -49,15 +49,22 @@ static inline void wait_on_inode(struct inode *inode)
 /*
  * mm/page-writeback.c
  */
+/* These 5 are exported to sysctl. */
 extern int dirty_background_ratio;
 extern int dirty_async_ratio;
 extern int dirty_sync_ratio;
 extern int dirty_writeback_centisecs;
 extern int dirty_expire_centisecs;
 
+
 void balance_dirty_pages(struct address_space *mapping);
 void balance_dirty_pages_ratelimited(struct address_space *mapping);
 int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
 int do_writepages(struct address_space *mapping, int *nr_to_write);
 
+/* pdflush.c */
+extern int nr_pdflush_threads;	/* Global so it can be exported to sysctl
+				   read-only. */
+
+
 #endif		/* WRITEBACK_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f0c6215b1718..0c2c938efb65 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -258,6 +258,13 @@ static ctl_table kern_table[] = {
 	{0}
 };
 
+/* Constants for minimum and maximum testing in vm_table.
+   We use these as one-element integer vectors. */
+static int zero = 0;
+static int one = 1;
+static int one_hundred = 100;
+
+
 static ctl_table vm_table[] = {
 	{VM_OVERCOMMIT_MEMORY, "overcommit_memory", &sysctl_overcommit_memory,
 	 sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec},
@@ -266,18 +273,37 @@ static ctl_table vm_table[] = {
 	{VM_PAGE_CLUSTER, "page-cluster", 
 	 &page_cluster, sizeof(int), 0644, NULL, &proc_dointvec},
 	{VM_DIRTY_BACKGROUND, "dirty_background_ratio",
-	&dirty_background_ratio, sizeof(dirty_background_ratio),
-	0644, NULL, &proc_dointvec},
+	 &dirty_background_ratio, sizeof(dirty_background_ratio),
+	 0644, NULL, &proc_dointvec_minmax,  &sysctl_intvec, NULL,
+	 &zero, &one_hundred },
 	{VM_DIRTY_ASYNC, "dirty_async_ratio", &dirty_async_ratio,
-	sizeof(dirty_async_ratio), 0644, NULL, &proc_dointvec},
+	 sizeof(dirty_async_ratio), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &zero, &one_hundred },
 	{VM_DIRTY_SYNC, "dirty_sync_ratio", &dirty_sync_ratio,
-	sizeof(dirty_sync_ratio), 0644, NULL, &proc_dointvec},
+	 sizeof(dirty_sync_ratio), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &zero, &one_hundred },
 	{VM_DIRTY_WB_CS, "dirty_writeback_centisecs",
-	&dirty_writeback_centisecs, sizeof(dirty_writeback_centisecs), 0644,
-	NULL, &proc_dointvec},
+	 &dirty_writeback_centisecs, sizeof(dirty_writeback_centisecs), 0644,
+	 NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL,
+	 /* Here, we define the range of possible values for
+	    dirty_writeback_centisecs.
+
+	    The default value is 5 seconds (500 centisec).  We will use 1
+	    centisec, the smallest possible value that could make any sort of
+	    sense.  If we allowed the user to set the interval to 0 seconds
+	    (which would presumably mean to chew up all of the CPU looking for
+	    dirty pages and writing them out, without taking a break), the
+	    interval would effectively become 1 second (100 centisecs), due to
+	    some nicely documented throttling code in wb_kupdate().
+
+	    There is no maximum legal value for dirty_writeback. */
+	 &one , NULL},
 	{VM_DIRTY_EXPIRE_CS, "dirty_expire_centisecs",
-	&dirty_expire_centisecs, sizeof(dirty_expire_centisecs), 0644,
-	NULL, &proc_dointvec},
+	 &dirty_expire_centisecs, sizeof(dirty_expire_centisecs), 0644,
+	 NULL, &proc_dointvec},
+	{ VM_NR_PDFLUSH_THREADS, "nr_pdflush_threads",
+	  &nr_pdflush_threads, sizeof nr_pdflush_threads,
+	  0444 /* read-only*/, NULL, &proc_dointvec},
 	{0}
 };
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 6d4555c3fb91..18a8af187ccb 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -47,6 +47,8 @@
 #define SYNC_WRITEBACK_PAGES	1500
 
 
+/* The following parameters are exported via /proc/sys/vm */
+
 /*
  * Dirty memory thresholds, in percentages
  */
@@ -67,15 +69,18 @@ int dirty_async_ratio = 50;
 int dirty_sync_ratio = 60;
 
 /*
- * The interval between `kupdate'-style writebacks.
+ * The interval between `kupdate'-style writebacks, in centiseconds
+ * (hundredths of a second)
  */
 int dirty_writeback_centisecs = 5 * 100;
 
 /*
- * The largest amount of time for which data is allowed to remain dirty
+ * The longest amount of time for which data is allowed to remain dirty
  */
 int dirty_expire_centisecs = 30 * 100;
 
+/* End of sysctl-exported parameters */
+
 
 static void background_writeout(unsigned long _min_pages);
 
@@ -233,7 +238,8 @@ static void wb_kupdate(unsigned long arg)
 static void wb_timer_fn(unsigned long unused)
 {
 	if (pdflush_operation(wb_kupdate, 0) < 0)
-		mod_timer(&wb_timer, jiffies + HZ);
+		mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */
+
 }
 
 static int __init wb_timer_init(void)
diff --git a/mm/pdflush.c b/mm/pdflush.c
index 16aa09b697fd..26fef9bc3235 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -15,6 +15,9 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/suspend.h>
+#include <linux/sched.h>	// Needed by writeback.h
+#include <linux/fs.h>		// Needed by writeback.h
+#include <linux/writeback.h>	// Prototypes pdflush_operation()
 
 
 /*
@@ -44,8 +47,11 @@ static spinlock_t pdflush_lock = SPIN_LOCK_UNLOCKED;
 /*
  * The count of currently-running pdflush threads.  Protected
  * by pdflush_lock.
+ *
+ * Readable by sysctl, but not writable.  Published to userspace at
+ * /proc/sys/vm/nr_pdflush_threads.
  */
-static int nr_pdflush_threads = 0;
+int nr_pdflush_threads = 0;
 
 /*
  * The time at which the pdflush thread pool last went empty
-- 
cgit v1.2.3


From f42e6ed8b8c71b3cdc1f0ed7c3f8d2f0c0ec6427 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:30:49 -0700
Subject: [PATCH] add new list_splice_init()

A little cleanup: Most callers of list_splice() immediately
reinitialise the source list_head after calling list_splice().

So create a new list_splice_init() which does all that.
---
 drivers/block/ll_rw_blk.c        |  3 +--
 drivers/ieee1394/ieee1394_core.c |  3 +--
 fs/fs-writeback.c                | 14 +++++---------
 fs/jfs/jfs_txnmgr.c              |  5 +----
 fs/mpage.c                       | 14 +++++---------
 fs/nfs/write.c                   |  3 +--
 include/linux/list.h             | 36 ++++++++++++++++++++++++++++--------
 7 files changed, 42 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 318ff55529fb..bb57f0ab64a8 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -964,8 +964,7 @@ void blk_run_queues(void)
 		return;
 	}
 
-	list_splice(&blk_plug_list, &local_plug_list);
-	INIT_LIST_HEAD(&blk_plug_list);
+	list_splice_init(&blk_plug_list, &local_plug_list);
 	spin_unlock_irq(&blk_plug_lock);
 	
 	while (!list_empty(&local_plug_list)) {
diff --git a/drivers/ieee1394/ieee1394_core.c b/drivers/ieee1394/ieee1394_core.c
index aa64a17f71ab..f72a0b9641af 100644
--- a/drivers/ieee1394/ieee1394_core.c
+++ b/drivers/ieee1394/ieee1394_core.c
@@ -740,8 +740,7 @@ void abort_requests(struct hpsb_host *host)
         host->ops->devctl(host, CANCEL_REQUESTS, 0);
 
         spin_lock_irqsave(&host->pending_pkt_lock, flags);
-        list_splice(&host->pending_packets, &llist);
-        INIT_LIST_HEAD(&host->pending_packets);
+        list_splice_init(&host->pending_packets, &llist);
         spin_unlock_irqrestore(&host->pending_pkt_lock, flags);
 
         list_for_each(lh, &llist) {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index db84b9f8eec4..d5a3e1ffdb81 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -220,8 +220,7 @@ static void sync_sb_inodes(struct super_block *sb, int sync_mode,
 	struct list_head *head;
 	const unsigned long start = jiffies;	/* livelock avoidance */
 
-	list_splice(&sb->s_dirty, &sb->s_io);
-	INIT_LIST_HEAD(&sb->s_dirty);
+	list_splice_init(&sb->s_dirty, &sb->s_io);
 	head = &sb->s_io;
 	while ((tmp = head->prev) != head) {
 		struct inode *inode = list_entry(tmp, struct inode, i_list);
@@ -262,13 +261,10 @@ static void sync_sb_inodes(struct super_block *sb, int sync_mode,
 			break;
 	}
 out:
-	if (!list_empty(&sb->s_io)) {
-		/*
-		 * Put the rest back, in the correct order.
-		 */
-		list_splice(&sb->s_io, sb->s_dirty.prev);
-		INIT_LIST_HEAD(&sb->s_io);
-	}
+	/*
+	 * Put the rest back, in the correct order.
+	 */
+	list_splice_init(&sb->s_io, sb->s_dirty.prev);
 	return;
 }
 
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 83bdf2ee9586..5d152b888182 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2975,10 +2975,7 @@ int jfs_sync(void)
 			}
 		}
 		/* Add anon_list2 back to anon_list */
-		if (!list_empty(&TxAnchor.anon_list2)) {
-			list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
-			INIT_LIST_HEAD(&TxAnchor.anon_list2);
-		}
+		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
 		add_wait_queue(&jfs_sync_thread_wait, &wq);
 		set_current_state(TASK_INTERRUPTIBLE);
 		TXN_UNLOCK();
diff --git a/fs/mpage.c b/fs/mpage.c
index 187d75047979..5f9b62416492 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -490,8 +490,7 @@ mpage_writepages(struct address_space *mapping,
 
 	write_lock(&mapping->page_lock);
 
-	list_splice(&mapping->dirty_pages, &mapping->io_pages);
-	INIT_LIST_HEAD(&mapping->dirty_pages);
+	list_splice_init(&mapping->dirty_pages, &mapping->io_pages);
 
         while (!list_empty(&mapping->io_pages) && !done) {
 		struct page *page = list_entry(mapping->io_pages.prev,
@@ -538,13 +537,10 @@ mpage_writepages(struct address_space *mapping,
 		page_cache_release(page);
 		write_lock(&mapping->page_lock);
 	}
-	if (!list_empty(&mapping->io_pages)) {
-		/*
-		 * Put the rest back, in the correct order.
-		 */
-		list_splice(&mapping->io_pages, mapping->dirty_pages.prev);
-		INIT_LIST_HEAD(&mapping->io_pages);
-	}
+	/*
+	 * Put the rest back, in the correct order.
+	 */
+	list_splice_init(&mapping->io_pages, mapping->dirty_pages.prev);
 	write_unlock(&mapping->page_lock);
 	if (bio)
 		mpage_bio_submit(WRITE, bio);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c57c01653abf..26fb344a3939 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1110,8 +1110,7 @@ nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
 
-	list_splice(head, &data->pages);
-	INIT_LIST_HEAD(head);
+	list_splice_init(head, &data->pages);
 	first = nfs_list_entry(data->pages.next);
 	last = nfs_list_entry(data->pages.prev);
 	inode = first->wb_inode;
diff --git a/include/linux/list.h b/include/linux/list.h
index ac65cc310ab6..3e70aa03ac11 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -136,6 +136,19 @@ static inline int list_empty(list_t *head)
 	return head->next == head;
 }
 
+static inline void __list_splice(list_t *list, list_t *head)
+{
+	list_t *first = list->next;
+	list_t *last = list->prev;
+	list_t *at = head->next;
+
+	first->prev = head;
+	head->next = first;
+
+	last->next = at;
+	at->prev = last;
+}
+
 /**
  * list_splice - join two lists
  * @list: the new list to add.
@@ -145,15 +158,22 @@ static inline void list_splice(list_t *list, list_t *head)
 {
 	list_t *first = list->next;
 
-	if (first != list) {
-		list_t *last = list->prev;
-		list_t *at = head->next;
-
-		first->prev = head;
-		head->next = first;
+	if (first != list)
+		__list_splice(list, head);
+}
 
-		last->next = at;
-		at->prev = last;
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(list_t *list, list_t *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head);
+		INIT_LIST_HEAD(list);
 	}
 }
 
-- 
cgit v1.2.3


From b2bd3a26bbf30c69062d45de4622e7e5b412ad16 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:30:54 -0700
Subject: [PATCH] set TASK_RUNNING in cond_resched()

do_select() does set_current_state(TASK_INTERRUPTIBLE) then calls
__pollwait() which calls __get_free_page() and the cond_resched() which
I added to the pagecache reclaim code never returns.

The patch makes cond_resched() more useful by setting current->state to
TASK_RUNNING before scheduling.
---
 include/linux/sched.h | 3 ++-
 kernel/ksyms.c        | 1 +
 kernel/sched.c        | 6 ++++++
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 90fb402ee802..9b5af2cc11d2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -837,10 +837,11 @@ static inline int need_resched(void)
 	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
 }
 
+extern void __cond_resched(void);
 static inline void cond_resched(void)
 {
 	if (need_resched())
-		schedule();
+		__cond_resched();
 }
 
 /* Reevaluate whether the task has signals pending delivery.
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index d2d6fe4794cc..e88333d0c08b 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -473,6 +473,7 @@ EXPORT_SYMBOL(preempt_schedule);
 #endif
 EXPORT_SYMBOL(schedule_timeout);
 EXPORT_SYMBOL(sys_sched_yield);
+EXPORT_SYMBOL(__cond_resched);
 EXPORT_SYMBOL(set_user_nice);
 EXPORT_SYMBOL(task_nice);
 EXPORT_SYMBOL_GPL(idle_cpu);
diff --git a/kernel/sched.c b/kernel/sched.c
index da94159d6426..11ede675025f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1447,6 +1447,12 @@ asmlinkage long sys_sched_yield(void)
 	return 0;
 }
 
+void __cond_resched(void)
+{
+	set_current_state(TASK_RUNNING);
+	schedule();
+}
+
 asmlinkage long sys_sched_get_priority_max(int policy)
 {
 	int ret = -EINVAL;
-- 
cgit v1.2.3


From b5b6fa5267d4e0da8b2f46904cec4d4f11c45d2e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:31:00 -0700
Subject: [PATCH] set TASK_RUNNING in yield()

It seems that the yield() macro requires state TASK_RUNNING, but
practically none of the callers remember to do that.

The patch turns yield() into a real function which sets state
TASK_RUNNING before scheduling.
---
 drivers/net/e100/e100.h | 8 --------
 fs/buffer.c             | 1 -
 include/linux/sched.h   | 3 +--
 kernel/ksyms.c          | 2 +-
 kernel/sched.c          | 6 ++++++
 kernel/softirq.c        | 1 -
 kernel/suspend.c        | 3 +--
 mm/page_alloc.c         | 1 -
 8 files changed, 9 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/e100/e100.h b/drivers/net/e100/e100.h
index 107a7a84cf3a..9e6b5d3d4e26 100644
--- a/drivers/net/e100/e100.h
+++ b/drivers/net/e100/e100.h
@@ -1031,14 +1031,6 @@ extern unsigned char e100_selftest(struct e100_private *bdp, u32 *st_timeout,
 extern unsigned char e100_get_link_state(struct e100_private *bdp);
 extern unsigned char e100_wait_scb(struct e100_private *bdp);
 
-#ifndef yield
-#define yield()					\
-        do {					\
-                current->policy |= SCHED_YIELD;	\
-                schedule();			\
-        } while (0)                                     
-#endif
-
 extern void e100_deisolate_driver(struct e100_private *bdp,
 				  u8 recover, u8 full_reset);
 extern unsigned char e100_hw_reset_recover(struct e100_private *bdp,
diff --git a/fs/buffer.c b/fs/buffer.c
index 6ba5de4d2051..5558b4e7dde9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -494,7 +494,6 @@ static void free_more_memory(void)
 	wakeup_bdflush();
 	try_to_free_pages(zone, GFP_NOFS, 0);
 	blk_run_queues();
-	__set_current_state(TASK_RUNNING);
 	yield();
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9b5af2cc11d2..29410b021b6c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -418,8 +418,7 @@ extern int task_prio(task_t *p);
 extern int task_nice(task_t *p);
 extern int idle_cpu(int cpu);
 
-asmlinkage long sys_sched_yield(void);
-#define yield() sys_sched_yield()
+void yield(void);
 
 /*
  * The default (Linux) execution domain.
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index e88333d0c08b..65bc2fcac173 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -472,7 +472,7 @@ EXPORT_SYMBOL(schedule);
 EXPORT_SYMBOL(preempt_schedule);
 #endif
 EXPORT_SYMBOL(schedule_timeout);
-EXPORT_SYMBOL(sys_sched_yield);
+EXPORT_SYMBOL(yield);
 EXPORT_SYMBOL(__cond_resched);
 EXPORT_SYMBOL(set_user_nice);
 EXPORT_SYMBOL(task_nice);
diff --git a/kernel/sched.c b/kernel/sched.c
index 11ede675025f..8dfe1941e7e0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1453,6 +1453,12 @@ void __cond_resched(void)
 	schedule();
 }
 
+void yield(void)
+{
+	set_current_state(TASK_RUNNING);
+	sys_sched_yield();
+}
+
 asmlinkage long sys_sched_get_priority_max(int policy)
 {
 	int ret = -EINVAL;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 3f233a3b3844..04f666d0e02b 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -262,7 +262,6 @@ void tasklet_kill(struct tasklet_struct *t)
 		printk("Attempt to kill tasklet from interrupt\n");
 
 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
-		current->state = TASK_RUNNING;
 		do
 			yield();
 		while (test_bit(TASKLET_STATE_SCHED, &t->state));
diff --git a/kernel/suspend.c b/kernel/suspend.c
index 12e5b0f01f57..f4f389c566b4 100644
--- a/kernel/suspend.c
+++ b/kernel/suspend.c
@@ -237,8 +237,7 @@ int freeze_processes(void)
 			todo++;
 		}
 		read_unlock(&tasklist_lock);
-		sys_sched_yield();
-		schedule();
+		yield();
 		if (time_after(jiffies, start_time + TIMEOUT)) {
 			PRINTK( "\n" );
 			printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cf914bd8180b..4d50db51b1b6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -441,7 +441,6 @@ nopage:
 		goto nopage;
 
 	/* Yield for kswapd, and try again */
-	__set_current_state(TASK_RUNNING);
 	yield();
 	goto rebalance;
 }
-- 
cgit v1.2.3


From 371151c9e3803d798991ae8b4c88b4bb232a68c7 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:31:15 -0700
Subject: [PATCH] resurrect __GFP_HIGH

This patch reinstates __GFP_HIGH functionality.

__GFP_HIGH means "able to dip into the emergency pools".  However,
somewhere along the line this got broken.  __GFP_HIGH ceased to do
anything.  Instead, !__GFP_WAIT is used to tell the page allocator to
try harder.

__GFP_HIGH makes sense.  The concepts of "unable to sleep" and "should
try harder" are quite separate, and overloading !__GFP_WAIT to mean
"should access emergency pools" seems wrong.

This patch fixes a problem in mempool_alloc().  mempool_alloc() tries
the first allocation with __GFP_WAIT cleared.  If that fails, it tries
again with __GFP_WAIT enabled (if the caller can support __GFP_WAIT).
So it is currently performing an atomic allocation first, even though
the caller said that they're prepared to go in and call the page
stealer.

I thought this was a mempool bug, but Ingo said:

> no, it's not GFP_ATOMIC. The important difference is __GFP_HIGH, which
> triggers the intrusive highprio allocation mode. Otherwise gfp_nowait is
> just a nonblocking allocation of the same type as the original gfp_mask.
> ...
> what i've added is a bit more subtle allocation method, with both
> performance and balancing-correctness in mind:
>
> 1. allocate via gfp_mask, but nonblocking
> 2. if failure => try to get from the pool if the pool is 'full enough'.
> 3. if failure => allocate with gfp_mask [which might block]
>
> there is performance data that this method improves bounce-IO performance
> significantly, because even under VM pressure (when gfp_mask would block)
> we can still use up to 50% of the memory pool without blocking (and
> without endangering deadlock-free allocation). Ie. the memory pool is also
> a fast 'frontside cache' of memory elements.

Ingo was assuming that __GFP_HIGH was still functional.  It isn't, and the
mempool design wants it.
---
 drivers/scsi/scsi_merge.c |  4 +++-
 include/linux/gfp.h       | 10 +++++-----
 mm/page_alloc.c           |  5 +----
 mm/slab.c                 |  4 ++--
 mm/vmscan.c               |  3 +++
 5 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/scsi_merge.c b/drivers/scsi/scsi_merge.c
index 82c3a17235bc..5bcf9138fcdb 100644
--- a/drivers/scsi/scsi_merge.c
+++ b/drivers/scsi/scsi_merge.c
@@ -74,8 +74,10 @@ int scsi_init_io(Scsi_Cmnd *SCpnt)
 	SCpnt->use_sg = count;
 
 	gfp_mask = GFP_NOIO;
-	if (in_interrupt())
+	if (in_interrupt()) {
 		gfp_mask &= ~__GFP_WAIT;
+		gfp_mask |= __GFP_HIGH;
+	}
 
 	/*
 	 * if sg table allocation fails, requeue request later.
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 63fac87d50ef..317d45415250 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -18,14 +18,14 @@
 #define __GFP_HIGHIO	0x80	/* Can start high mem physical IO? */
 #define __GFP_FS	0x100	/* Can call down to low-level FS? */
 
-#define GFP_NOHIGHIO	(__GFP_HIGH | __GFP_WAIT | __GFP_IO)
-#define GFP_NOIO	(__GFP_HIGH | __GFP_WAIT)
-#define GFP_NOFS	(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO)
+#define GFP_NOHIGHIO	(             __GFP_WAIT | __GFP_IO)
+#define GFP_NOIO	(             __GFP_WAIT)
+#define GFP_NOFS	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO)
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_USER	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
 #define GFP_HIGHUSER	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM)
-#define GFP_KERNEL	(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
-#define GFP_NFS		(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+#define GFP_KERNEL	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+#define GFP_NFS		(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
 #define GFP_KSWAPD	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
 
 /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4d50db51b1b6..8266c5a21751 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -272,8 +272,6 @@ static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask
 	struct page * page = NULL;
 	int __freed = 0;
 
-	if (!(gfp_mask & __GFP_WAIT))
-		goto out;
 	if (in_interrupt())
 		BUG();
 
@@ -333,7 +331,6 @@ static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask
 		}
 		current->nr_local_pages = 0;
 	}
- out:
 	*freed = __freed;
 	return page;
 }
@@ -380,7 +377,7 @@ struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_
 			break;
 
 		local_min = z->pages_min;
-		if (!(gfp_mask & __GFP_WAIT))
+		if (gfp_mask & __GFP_HIGH)
 			local_min >>= 2;
 		min += local_min;
 		if (z->free_pages > min) {
diff --git a/mm/slab.c b/mm/slab.c
index 07d40987d310..5153b9469737 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1153,12 +1153,12 @@ static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
 	 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
 	 * will eventually be caught here (where it matters).
 	 */
-	if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
+	if (in_interrupt() && (flags & __GFP_WAIT))
 		BUG();
 
 	ctor_flags = SLAB_CTOR_CONSTRUCTOR;
 	local_flags = (flags & SLAB_LEVEL_MASK);
-	if (local_flags == SLAB_ATOMIC)
+	if (!(local_flags & __GFP_WAIT))
 		/*
 		 * Not allowed to sleep.  Need to tell a constructor about
 		 * this - it might need to know...
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6561f2b71b35..56757cc67ee5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -52,6 +52,9 @@ static inline int is_page_cache_freeable(struct page * page)
  * So PF_MEMALLOC is dropped here.  This causes the slab allocations to fail
  * earlier, so radix-tree nodes will then be allocated from the mempool
  * reserves.
+ *
+ * We're still using __GFP_HIGH for radix-tree node allocations, so some of
+ * the emergency pools are available - just not all of them.
  */
 static inline int
 swap_out_add_to_swap_cache(struct page *page, swp_entry_t entry)
-- 
cgit v1.2.3


From 193ae03649cdb7e0f0952314e3fac2c850b3d86a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:31:30 -0700
Subject: [PATCH] suppress more allocation failure warnings

The `page allocation failure' warning in __alloc_pages() is being a
pain.  But I'm persisting with it...

The patch renames PF_RADIX_TREE to PF_NOWARN, and uses it in a few
places where allocations failures are known to happen.  These code
paths are well-tested now and suppressing the warning is OK.
---
 drivers/scsi/scsi.c   |  2 ++
 fs/bio.c              | 13 +++++++++----
 fs/buffer.c           |  2 ++
 include/linux/sched.h |  2 +-
 mm/page_alloc.c       |  2 +-
 mm/vmscan.c           |  2 +-
 6 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 0215c45cbd37..6b47f8966914 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -2481,7 +2481,9 @@ struct scatterlist *scsi_alloc_sgtable(Scsi_Cmnd *SCpnt, int gfp_mask)
 
 	sgp = scsi_sg_pools + SCpnt->sglist_len;
 
+	current->flags |= PF_NOWARN;
 	sgl = mempool_alloc(sgp->pool, gfp_mask);
+	current->flags &= ~PF_NOWARN;
 	if (sgl) {
 		memset(sgl, 0, sgp->size);
 		return sgl;
diff --git a/fs/bio.c b/fs/bio.c
index 5fdae32e35ae..277a8f730f7a 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -135,21 +135,26 @@ inline void bio_init(struct bio *bio)
  **/
 struct bio *bio_alloc(int gfp_mask, int nr_iovecs)
 {
-	struct bio *bio = mempool_alloc(bio_pool, gfp_mask);
+	struct bio *bio;
 	struct bio_vec *bvl = NULL;
 
+	current->flags |= PF_NOWARN;
+	bio = mempool_alloc(bio_pool, gfp_mask);
 	if (unlikely(!bio))
-		return NULL;
+		goto out;
 
 	if (!nr_iovecs || (bvl = bvec_alloc(gfp_mask,nr_iovecs,&bio->bi_max))) {
 		bio_init(bio);
 		bio->bi_destructor = bio_destructor;
 		bio->bi_io_vec = bvl;
-		return bio;
+		goto out;
 	}
 
 	mempool_free(bio, bio_pool);
-	return NULL;
+	bio = NULL;
+out:
+	current->flags &= ~PF_NOWARN;
+	return bio;
 }
 
 /**
diff --git a/fs/buffer.c b/fs/buffer.c
index 5558b4e7dde9..0d1f9e71f30c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -965,7 +965,9 @@ try_again:
 	head = NULL;
 	offset = PAGE_SIZE;
 	while ((offset -= size) >= 0) {
+		current->flags |= PF_NOWARN;
 		bh = alloc_buffer_head();
+		current->flags &= ~PF_NOWARN;
 		if (!bh)
 			goto no_grow;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 29410b021b6c..8f76bcdae1eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -386,7 +386,7 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
 #define PF_MEMDIE	0x00001000	/* Killed for out-of-memory */
 #define PF_FREE_PAGES	0x00002000	/* per process page freeing */
 #define PF_FLUSHER	0x00004000	/* responsible for disk writeback */
-#define PF_RADIX_TREE	0x00008000	/* debug: performing radix tree alloc */
+#define PF_NOWARN	0x00008000	/* debug: don't warn if alloc fails */
 
 #define PF_FREEZE	0x00010000	/* this task should be frozen for suspend */
 #define PF_IOTHREAD	0x00020000	/* this thread is needed for doing I/O to swap */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1957b56a1480..9dfd799f2193 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -399,7 +399,7 @@ rebalance:
 				return page;
 		}
 nopage:
-		if (!(current->flags & PF_RADIX_TREE)) {
+		if (!(current->flags & PF_NOWARN)) {
 			printk("%s: page allocation failure."
 				" order:%d, mode:0x%x\n",
 				current->comm, order, gfp_mask);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index da823781fc06..759fb8f7c435 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -63,7 +63,7 @@ swap_out_add_to_swap_cache(struct page *page, swp_entry_t entry)
 	int ret;
 
 	current->flags &= ~PF_MEMALLOC;
-	current->flags |= PF_RADIX_TREE;
+	current->flags |= PF_NOWARN;
 	ClearPageUptodate(page);		/* why? */
 	ClearPageReferenced(page);		/* why? */
 	ret = add_to_swap_cache(page, entry);
-- 
cgit v1.2.3


From 8b00e4fac18298dce49b78d25c5ea2399aeb6fa2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:31:50 -0700
Subject: [PATCH] JBD commit callback capability

This is a patch which Stephen has applied to ext3's 2.4 repository.
Originally written by Andreas, generalised somewhat by Stephen.

Add jbd callback mechanism, requested for InterMezzo.  We allow the jbd's
client to request notification when a given handle's IO finally commits to
disk, so that clients can manage their own writeback state asynchronously.
---
 fs/jbd/checkpoint.c  |  3 ++-
 fs/jbd/commit.c      | 31 +++++++++++++++++++++----
 fs/jbd/journal.c     |  1 +
 fs/jbd/transaction.c | 65 ++++++++++++++++++++++++++++++++++++++++++----------
 include/linux/jbd.h  | 20 ++++++++++++++++
 5 files changed, 102 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 428dcd822dec..d640e23a1bf1 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -592,7 +592,8 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 	J_ASSERT (transaction->t_log_list == NULL);
 	J_ASSERT (transaction->t_checkpoint_list == NULL);
 	J_ASSERT (transaction->t_updates == 0);
-	
+	J_ASSERT (list_empty(&transaction->t_jcb));
+
 	J_ASSERT (transaction->t_journal->j_committing_transaction !=
 					transaction);
 	
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 2283894a81a6..abbd16f1e822 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -471,7 +471,7 @@ start_journal_io:
            transaction's t_log_list queue, and metadata buffers are on
            the t_iobuf_list queue.
 
-	   Wait for the transactions in reverse order.  That way we are
+	   Wait for the buffers in reverse order.  That way we are
 	   less likely to be woken up until all IOs have completed, and
 	   so we incur less scheduling load.
 	*/
@@ -563,8 +563,10 @@ start_journal_io:
 
 	jbd_debug(3, "JBD: commit phase 6\n");
 
-	if (is_journal_aborted(journal))
+	if (is_journal_aborted(journal)) {
+		unlock_journal(journal);
 		goto skip_commit;
+	}
 
 	/* Done it all: now write the commit record.  We should have
 	 * cleaned up our previous buffers by now, so if we are in abort
@@ -574,9 +576,10 @@ start_journal_io:
 	descriptor = journal_get_descriptor_buffer(journal);
 	if (!descriptor) {
 		__journal_abort_hard(journal);
+		unlock_journal(journal);
 		goto skip_commit;
 	}
-	
+
 	/* AKPM: buglet - add `i' to tmp! */
 	for (i = 0; i < jh2bh(descriptor)->b_size; i += 512) {
 		journal_header_t *tmp =
@@ -596,14 +599,32 @@ start_journal_io:
 		__brelse(bh);		/* One for getblk() */
 		journal_unlock_journal_head(descriptor);
 	}
-	lock_journal(journal);
 
 	/* End of a transaction!  Finally, we can do checkpoint
            processing: any buffers committed as a result of this
            transaction can be removed from any checkpoint list it was on
            before. */
 
-skip_commit:
+skip_commit: /* The journal should be unlocked by now. */
+
+	/* Call any callbacks that had been registered for handles in this
+	 * transaction.  It is up to the callback to free any allocated
+	 * memory.
+	 */
+	if (!list_empty(&commit_transaction->t_jcb)) {
+		struct list_head *p, *n;
+		int error = is_journal_aborted(journal);
+
+		list_for_each_safe(p, n, &commit_transaction->t_jcb) {
+			struct journal_callback *jcb;
+
+			jcb = list_entry(p, struct journal_callback, jcb_list);
+			list_del(p);
+			jcb->jcb_func(jcb, error);
+		}
+	}
+
+	lock_journal(journal);
 
 	jbd_debug(3, "JBD: commit phase 7\n");
 
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index ade37ad43606..3d5c7d921e60 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -58,6 +58,7 @@ EXPORT_SYMBOL(journal_sync_buffer);
 #endif
 EXPORT_SYMBOL(journal_flush);
 EXPORT_SYMBOL(journal_revoke);
+EXPORT_SYMBOL(journal_callback_set);
 
 EXPORT_SYMBOL(journal_init_dev);
 EXPORT_SYMBOL(journal_init_inode);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 37c9ed30ebfd..860b2685ebd6 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -57,6 +57,7 @@ static transaction_t * get_transaction (journal_t * journal, int is_try)
 	transaction->t_state = T_RUNNING;
 	transaction->t_tid = journal->j_transaction_sequence++;
 	transaction->t_expires = jiffies + journal->j_commit_interval;
+	INIT_LIST_HEAD(&transaction->t_jcb);
 
 	/* Set up the commit timer for the new transaction. */
 	J_ASSERT (!journal->j_commit_timer_active);
@@ -90,7 +91,14 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
 	transaction_t *transaction;
 	int needed;
 	int nblocks = handle->h_buffer_credits;
-	
+
+	if (nblocks > journal->j_max_transaction_buffers) {
+		printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
+		       current->comm, nblocks,
+		       journal->j_max_transaction_buffers);
+		return -ENOSPC;
+	}
+
 	jbd_debug(3, "New handle %p going live.\n", handle);
 
 repeat:
@@ -200,6 +208,20 @@ repeat_locked:
 	return 0;
 }
 
+/* Allocate a new handle.  This should probably be in a slab... */
+static handle_t *new_handle(int nblocks)
+{
+	handle_t *handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
+	if (!handle)
+		return NULL;
+	memset(handle, 0, sizeof (handle_t));
+	handle->h_buffer_credits = nblocks;
+	handle->h_ref = 1;
+	INIT_LIST_HEAD(&handle->h_jcb);
+
+	return handle;
+}
+
 /*
  * Obtain a new handle.  
  *
@@ -226,14 +248,11 @@ handle_t *journal_start(journal_t *journal, int nblocks)
 		handle->h_ref++;
 		return handle;
 	}
-	
-	handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
+
+	handle = new_handle(nblocks);
 	if (!handle)
 		return ERR_PTR(-ENOMEM);
-	memset (handle, 0, sizeof (handle_t));
 
-	handle->h_buffer_credits = nblocks;
-	handle->h_ref = 1;
 	current->journal_info = handle;
 
 	err = start_this_handle(journal, handle);
@@ -332,14 +351,11 @@ handle_t *journal_try_start(journal_t *journal, int nblocks)
 	
 	if (is_journal_aborted(journal))
 		return ERR_PTR(-EIO);
-	
-	handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
+
+	handle = new_handle(nblocks);
 	if (!handle)
 		return ERR_PTR(-ENOMEM);
-	memset (handle, 0, sizeof (handle_t));
 
-	handle->h_buffer_credits = nblocks;
-	handle->h_ref = 1;
 	current->journal_info = handle;
 
 	err = try_start_this_handle(journal, handle);
@@ -1347,6 +1363,28 @@ out:
 }
 #endif
 
+/*
+ * Register a callback function for this handle.  The function will be
+ * called when the transaction that this handle is part of has been
+ * committed to disk with the original callback data struct and the
+ * error status of the journal as parameters.  There is no guarantee of
+ * ordering between handles within a single transaction, nor between
+ * callbacks registered on the same handle.
+ *
+ * The caller is responsible for allocating the journal_callback struct.
+ * This is to allow the caller to add as much extra data to the callback
+ * as needed, but reduce the overhead of multiple allocations.  The caller
+ * allocated struct must start with a struct journal_callback at offset 0,
+ * and has the caller-specific data afterwards.
+ */
+void journal_callback_set(handle_t *handle,
+			  void (*func)(struct journal_callback *jcb, int error),
+			  struct journal_callback *jcb)
+{
+	list_add_tail(&jcb->jcb_list, &handle->h_jcb);
+	jcb->jcb_func = func;
+}
+
 /*
  * All done for a particular handle.
  *
@@ -1411,7 +1449,10 @@ int journal_stop(handle_t *handle)
 			wake_up(&journal->j_wait_transaction_locked);
 	}
 
-	/* 
+	/* Move callbacks from the handle to the transaction. */
+	list_splice(&handle->h_jcb, &transaction->t_jcb);
+
+	/*
 	 * If the handle is marked SYNC, we need to set another commit
 	 * going!  We also want to force a commit if the current
 	 * transaction is occupying too much of the log, or if the
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 683c1247fd70..fafb868eec11 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -250,6 +250,13 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
 	return bh->b_private;
 }
 
+#define HAVE_JOURNAL_CALLBACK_STATUS
+struct journal_callback {
+	struct list_head jcb_list;
+	void (*jcb_func)(struct journal_callback *jcb, int error);
+	/* user data goes here */
+};
+
 struct jbd_revoke_table_s;
 
 /* The handle_t type represents a single atomic update being performed
@@ -280,6 +287,12 @@ struct handle_s
 	   operations */
 	int			h_err;
 
+	/* List of application registered callbacks for this handle.
+	 * The function(s) will be called after the transaction that
+	 * this handle is part of has been committed to disk.
+	 */
+	struct list_head	h_jcb;
+
 	/* Flags */
 	unsigned int	h_sync:		1;	/* sync-on-close */
 	unsigned int	h_jdata:	1;	/* force data journaling */
@@ -399,6 +412,10 @@ struct transaction_s
 
 	/* How many handles used this transaction? */
 	int t_handle_count;
+
+	/* List of registered callback functions for this transaction.
+	 * Called when the transaction is committed. */
+	struct list_head	t_jcb;
 };
 
 
@@ -647,6 +664,9 @@ extern int	 journal_invalidatepage(journal_t *,
 extern int	 journal_try_to_free_buffers(journal_t *, struct page *, int);
 extern int	 journal_stop(handle_t *);
 extern int	 journal_flush (journal_t *);
+extern void	 journal_callback_set(handle_t *handle,
+				      void (*fn)(struct journal_callback *,int),
+				      struct journal_callback *jcb);
 
 extern void	 journal_lock_updates (journal_t *);
 extern void	 journal_unlock_updates (journal_t *);
-- 
cgit v1.2.3


From f1dfe022bbea7aa89e5215a984337d6559b6bcd2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@zip.com.au>
Date: Thu, 4 Jul 2002 08:32:11 -0700
Subject: [PATCH] Use names, not numbers for pagefault types

This is Bill Irwin's cleanup patch which gives symbolic names to the
fault types:

	#define VM_FAULT_OOM	(-1)
	#define VM_FAULT_SIGBUS	0
	#define VM_FAULT_MINOR	1
	#define VM_FAULT_MAJOR	2

Only arch/i386 has been updated - other architectures can do this too.
---
 arch/i386/mm/fault.c | 34 ++++++++++++++++------------
 include/linux/mm.h   | 10 ++++++++
 mm/memory.c          | 64 +++++++++++++++++++++++++++++-----------------------
 3 files changed, 66 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index fe6d634fc9c3..474009886b35 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -56,12 +56,16 @@ good_area:
 
 	for (;;) {
 	survive:
-		{
-			int fault = handle_mm_fault(current->mm, vma, start, 1);
-			if (!fault)
+		switch (handle_mm_fault(current->mm, vma, start, 1)) {
+			case VM_FAULT_SIGBUS:
 				goto bad_area;
-			if (fault < 0)
+			case VM_FAULT_OOM:
 				goto out_of_memory;
+			case VM_FAULT_MINOR:
+			case VM_FAULT_MAJOR:
+				break;
+			default:
+				BUG();
 		}
 		if (!size)
 			break;
@@ -239,16 +243,18 @@ good_area:
 	 * the fault.
 	 */
 	switch (handle_mm_fault(mm, vma, address, write)) {
-	case 1:
-		tsk->min_flt++;
-		break;
-	case 2:
-		tsk->maj_flt++;
-		break;
-	case 0:
-		goto do_sigbus;
-	default:
-		goto out_of_memory;
+		case VM_FAULT_MINOR:
+			tsk->min_flt++;
+			break;
+		case VM_FAULT_MAJOR:
+			tsk->maj_flt++;
+			break;
+		case VM_FAULT_SIGBUS:
+			goto do_sigbus;
+		case VM_FAULT_OOM:
+			goto out_of_memory;
+		default:
+			BUG();
 	}
 
 	/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c93dfffc4760..163e19fd7b33 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -305,6 +305,16 @@ static inline void set_page_zone(struct page *page, unsigned long zone_num)
 #define NOPAGE_SIGBUS	(NULL)
 #define NOPAGE_OOM	((struct page *) (-1))
 
+/*
+ * Different kinds of faults, as returned by handle_mm_fault().
+ * Used to decide whether a process gets delivered SIGBUS or
+ * just gets major/minor fault counters bumped up.
+ */
+#define VM_FAULT_OOM	(-1)
+#define VM_FAULT_SIGBUS	0
+#define VM_FAULT_MINOR	1
+#define VM_FAULT_MAJOR	2
+
 /* The array of struct pages */
 extern struct page *mem_map;
 
diff --git a/mm/memory.c b/mm/memory.c
index 1232b7cd5a42..a9691d59728f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -503,18 +503,18 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long
 			while (!(map = follow_page(mm, start, write))) {
 				spin_unlock(&mm->page_table_lock);
 				switch (handle_mm_fault(mm, vma, start, write)) {
-				case 1:
+				case VM_FAULT_MINOR:
 					tsk->min_flt++;
 					break;
-				case 2:
+				case VM_FAULT_MAJOR:
 					tsk->maj_flt++;
 					break;
-				case 0:
-					if (i) return i;
-					return -EFAULT;
+				case VM_FAULT_SIGBUS:
+					return i ? i : -EFAULT;
+				case VM_FAULT_OOM:
+					return i ? i : -ENOMEM;
 				default:
-					if (i) return i;
-					return -ENOMEM;
+					BUG();
 				}
 				spin_lock(&mm->page_table_lock);
 			}
@@ -968,7 +968,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
 			establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
 			pte_unmap(page_table);
 			spin_unlock(&mm->page_table_lock);
-			return 1;	/* Minor fault */
+			return VM_FAULT_MINOR;
 		}
 	}
 	pte_unmap(page_table);
@@ -1002,16 +1002,21 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
 	spin_unlock(&mm->page_table_lock);
 	page_cache_release(new_page);
 	page_cache_release(old_page);
-	return 1;	/* Minor fault */
+	return VM_FAULT_MINOR;
 
 bad_wp_page:
 	pte_unmap(page_table);
 	spin_unlock(&mm->page_table_lock);
 	printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n", address);
-	return -1;
+	/*
+	 * This should really halt the system so it can be debugged or
+	 * at least the kernel stops what it's doing before it corrupts
+	 * data, but for the moment just pretend this is OOM.
+	 */
+	return VM_FAULT_OOM;
 no_mem:
 	page_cache_release(old_page);
-	return -1;
+	return VM_FAULT_OOM;
 }
 
 static void vmtruncate_list(list_t *head, unsigned long pgoff)
@@ -1135,7 +1140,7 @@ static int do_swap_page(struct mm_struct * mm,
 	struct page *page;
 	swp_entry_t entry = pte_to_swp_entry(orig_pte);
 	pte_t pte;
-	int ret = 1;
+	int ret = VM_FAULT_MINOR;
 
 	pte_unmap(page_table);
 	spin_unlock(&mm->page_table_lock);
@@ -1148,17 +1153,19 @@ static int do_swap_page(struct mm_struct * mm,
 			 * Back out if somebody else faulted in this pte while
 			 * we released the page table lock.
 			 */
-			int retval;
 			spin_lock(&mm->page_table_lock);
 			page_table = pte_offset_map(pmd, address);
-			retval = pte_same(*page_table, orig_pte) ? -1 : 1;
+			if (pte_same(*page_table, orig_pte))
+				ret = VM_FAULT_OOM;
+			else
+				ret = VM_FAULT_MINOR;
 			pte_unmap(page_table);
 			spin_unlock(&mm->page_table_lock);
-			return retval;
+			return ret;
 		}
 
 		/* Had to read the page from swap area: Major fault */
-		ret = 2;
+		ret = VM_FAULT_MAJOR;
 	}
 
 	lock_page(page);
@@ -1174,7 +1181,7 @@ static int do_swap_page(struct mm_struct * mm,
 		spin_unlock(&mm->page_table_lock);
 		unlock_page(page);
 		page_cache_release(page);
-		return 1;
+		return VM_FAULT_MINOR;
 	}
 
 	/* The page isn't present yet, go ahead with the fault. */
@@ -1232,7 +1239,7 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
 			pte_unmap(page_table);
 			page_cache_release(page);
 			spin_unlock(&mm->page_table_lock);
-			return 1;
+			return VM_FAULT_MINOR;
 		}
 		mm->rss++;
 		flush_page_to_ram(page);
@@ -1246,10 +1253,10 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, addr, entry);
 	spin_unlock(&mm->page_table_lock);
-	return 1;	/* Minor fault */
+	return VM_FAULT_MINOR;
 
 no_mem:
-	return -1;
+	return VM_FAULT_OOM;
 }
 
 /*
@@ -1277,10 +1284,11 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
 
 	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0);
 
-	if (new_page == NULL)	/* no page was available -- SIGBUS */
-		return 0;
+	/* no page was available -- either SIGBUS or OOM */
+	if (new_page == NOPAGE_SIGBUS)
+		return VM_FAULT_SIGBUS;
 	if (new_page == NOPAGE_OOM)
-		return -1;
+		return VM_FAULT_OOM;
 
 	/*
 	 * Should we do an early C-O-W break?
@@ -1289,7 +1297,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
 		struct page * page = alloc_page(GFP_HIGHUSER);
 		if (!page) {
 			page_cache_release(new_page);
-			return -1;
+			return VM_FAULT_OOM;
 		}
 		copy_user_highpage(page, new_page, address);
 		page_cache_release(new_page);
@@ -1325,13 +1333,13 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
 		pte_unmap(page_table);
 		page_cache_release(new_page);
 		spin_unlock(&mm->page_table_lock);
-		return 1;
+		return VM_FAULT_MINOR;
 	}
 
 	/* no need to invalidate: a not-present page shouldn't be cached */
 	update_mmu_cache(vma, address, entry);
 	spin_unlock(&mm->page_table_lock);
-	return 2;	/* Major fault */
+	return VM_FAULT_MAJOR;
 }
 
 /*
@@ -1383,7 +1391,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
 	establish_pte(vma, address, pte, entry);
 	pte_unmap(pte);
 	spin_unlock(&mm->page_table_lock);
-	return 1;
+	return VM_FAULT_MINOR;
 }
 
 /*
@@ -1411,7 +1419,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
 			return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
 	}
 	spin_unlock(&mm->page_table_lock);
-	return -1;
+	return VM_FAULT_OOM;
 }
 
 /*
-- 
cgit v1.2.3


From 2aa859378756ae63d41b2cafb8bcdb9246afeb95 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Thu, 4 Jul 2002 08:53:12 -0700
Subject: [PATCH] devpts cleanup

	* devpts "upcalls" eliminated.
	* instead of playing games with revalidation we simply use
ramfs-style tree and kill dentries upon devpts_pty_kill().  That
allows to get rid of a lot of code in fs/devpts/*.c.
	* devpts_fs.h cleaned up.
	* devpts/root.c and devpts/devpts_i.h removed.
	* array of pointers to devpts inodes killed; with ramfs-style tree
it's not needed anymore.
	* devpts/inode.c cleaned up.
	* devpts_pty_new() used to get mk_kdev() only to convert it to
dev_t (hardly a surprise, since it's mknod() in disguise).  Now it gets
dev_t as an argument.
---
 drivers/char/pty.c        |   2 -
 drivers/char/tty_io.c     |   2 +-
 fs/Makefile               |   2 +-
 fs/devpts/Makefile        |   2 +-
 fs/devpts/devpts_i.h      |  41 ---------
 fs/devpts/inode.c         | 216 ++++++++++++++++------------------------------
 fs/devpts/root.c          | 142 ------------------------------
 include/linux/devpts_fs.h |  49 +----------
 8 files changed, 80 insertions(+), 376 deletions(-)
 delete mode 100644 fs/devpts/devpts_i.h
 delete mode 100644 fs/devpts/root.c

(limited to 'include/linux')

diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index fc6d0fe36b66..e9acfc1a5ae1 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -25,8 +25,6 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/bitops.h>
-
-#define BUILDING_PTY_C 1
 #include <linux/devpts_fs.h>
 
 struct pty_struct {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 4a2bd436f8e4..1c7b4b01d3a4 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1336,7 +1336,7 @@ retry_open:
 	ptmx_found:
 		set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
 		minor -= driver->minor_start;
-		devpts_pty_new(driver->other->name_base + minor, mk_kdev(driver->other->major, minor + driver->other->minor_start));
+		devpts_pty_new(driver->other->name_base + minor, MKDEV(driver->other->major, minor + driver->other->minor_start));
 		tty_register_devfs(&pts_driver[major], DEVFS_FL_DEFAULT,
 				   pts_driver[major].minor_start + minor);
 		noctty = 1;
diff --git a/fs/Makefile b/fs/Makefile
index 28fe3eb901db..2f516179753c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_QUOTACTL)		+= quota.o
 obj-$(CONFIG_PROC_FS)		+= proc/
 obj-y				+= partitions/
 obj-y				+= driverfs/
+obj-y				+= devpts/
 
 # Do not add any filesystems before this line
 obj-$(CONFIG_EXT3_FS)		+= ext3/ # Before ext2 so root fs can be ext3
@@ -81,7 +82,6 @@ obj-$(CONFIG_AUTOFS_FS)		+= autofs/
 obj-$(CONFIG_AUTOFS4_FS)	+= autofs4/
 obj-$(CONFIG_ADFS_FS)		+= adfs/
 obj-$(CONFIG_REISERFS_FS)	+= reiserfs/
-obj-$(CONFIG_DEVPTS_FS)		+= devpts/
 obj-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs/
 obj-$(CONFIG_JFS_FS)		+= jfs/
 
diff --git a/fs/devpts/Makefile b/fs/devpts/Makefile
index 59aba3c15f50..f80cfe5bd8cd 100644
--- a/fs/devpts/Makefile
+++ b/fs/devpts/Makefile
@@ -4,6 +4,6 @@
 
 obj-$(CONFIG_DEVPTS_FS) += devpts.o
 
-devpts-objs := root.o inode.o
+devpts-objs := inode.o
 
 include $(TOPDIR)/Rules.make
diff --git a/fs/devpts/devpts_i.h b/fs/devpts/devpts_i.h
deleted file mode 100644
index da387ea1a932..000000000000
--- a/fs/devpts/devpts_i.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/devpts/devpts_i.h
- *
- *  Copyright 1998 H. Peter Anvin -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/fs.h>
-#include <linux/tty.h>
-#include <linux/types.h>
-
-#define BUILDING_DEVPTS 1
-#include <linux/devpts_fs.h>
-
-struct devpts_sb_info {
-	u32 magic;
-	int setuid;
-	int setgid;
-	uid_t   uid;
-	gid_t   gid;
-	umode_t mode;
-
-	unsigned int max_ptys;
-	struct inode **inodes;
-};
-
-#define DEVPTS_SUPER_MAGIC 0x1cd1
-#define DEVPTS_SBI_MAGIC   0x01da1d02
-
-extern inline struct devpts_sb_info *SBI(struct super_block *sb)
-{
-	return (struct devpts_sb_info *)(sb->u.generic_sbp);
-}
-
-extern struct inode_operations devpts_root_inode_operations;
-extern struct file_operations devpts_root_operations;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 0727e719279d..973c288fe28a 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -11,135 +11,70 @@
  * ------------------------------------------------------------------------- */
 
 #include <linux/module.h>
-
-#include <linux/string.h>
-#include <linux/fs.h>
 #include <linux/init.h>
-#include <linux/kdev_t.h>
-#include <linux/kernel.h>
-#include <linux/major.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/tty.h>
-#include <asm/bitops.h>
-#include <asm/uaccess.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
 
-#include "devpts_i.h"
+#define DEVPTS_SUPER_MAGIC 0x1cd1
 
 static struct vfsmount *devpts_mnt;
+static struct dentry *devpts_root;
 
-static void devpts_put_super(struct super_block *sb)
-{
-	struct devpts_sb_info *sbi = SBI(sb);
-	struct inode *inode;
-	int i;
+static struct {
+	int setuid;
+	int setgid;
+	uid_t   uid;
+	gid_t   gid;
+	umode_t mode;
+} config = {mode: 0600};
 
-	for ( i = 0 ; i < sbi->max_ptys ; i++ ) {
-		if ( (inode = sbi->inodes[i]) ) {
-			if ( atomic_read(&inode->i_count) != 1 )
-				printk("devpts_put_super: badness: entry %d count %d\n",
-				       i, atomic_read(&inode->i_count));
-			inode->i_nlink--;
-			iput(inode);
-		}
-	}
-	kfree(sbi->inodes);
-	kfree(sbi);
-}
-
-static int devpts_remount (struct super_block * sb, int * flags, char * data);
-
-static struct super_operations devpts_sops = {
-	put_super:	devpts_put_super,
-	statfs:		simple_statfs,
-	remount_fs:	devpts_remount,
-};
-
-static int devpts_parse_options(char *options, struct devpts_sb_info *sbi)
+static int devpts_remount(struct super_block *sb, int *flags, char *data)
 {
 	int setuid = 0;
 	int setgid = 0;
 	uid_t uid = 0;
 	gid_t gid = 0;
 	umode_t mode = 0600;
-	char *this_char, *value;
+	char *this_char;
 
 	this_char = NULL;
-	while ((this_char = strsep(&options, ",")) != NULL) {
+	while ((this_char = strsep(&data, ",")) != NULL) {
+		int n;
+		char dummy;
 		if (!*this_char)
 			continue;
-		if ((value = strchr(this_char,'=')) != NULL)
-			*value++ = 0;
-		if (!strcmp(this_char,"uid")) {
-			if (!value || !*value)
-				return 1;
-			uid = simple_strtoul(value,&value,0);
-			if (*value)
-				return 1;
+		if (sscanf(this_char, "uid=%i%c", &n, &dummy) == 1) {
 			setuid = 1;
-		}
-		else if (!strcmp(this_char,"gid")) {
-			if (!value || !*value)
-				return 1;
-			gid = simple_strtoul(value,&value,0);
-			if (*value)
-				return 1;
+			uid = n;
+		} else if (sscanf(this_char, "gid=%i%c", &n, &dummy) == 1) {
 			setgid = 1;
+			gid = n;
+		} else if (sscanf(this_char, "mode=%o%c", &n, &dummy) == 1)
+			mode = n & ~S_IFMT;
+		else {
+			printk("devpts: called with bogus options\n");
+			return -EINVAL;
 		}
-		else if (!strcmp(this_char,"mode")) {
-			if (!value || !*value)
-				return 1;
-			mode = simple_strtoul(value,&value,8);
-			if (*value)
-				return 1;
-		}
-		else
-			return 1;
 	}
-	sbi->setuid  = setuid;
-	sbi->setgid  = setgid;
-	sbi->uid     = uid;
-	sbi->gid     = gid;
-	sbi->mode    = mode & ~S_IFMT;
+	config.setuid  = setuid;
+	config.setgid  = setgid;
+	config.uid     = uid;
+	config.gid     = gid;
+	config.mode    = mode;
 
 	return 0;
 }
 
-static int devpts_remount(struct super_block * sb, int * flags, char * data)
-{
-	struct devpts_sb_info *sbi = sb->u.generic_sbp;
-	int res = devpts_parse_options(data,sbi);
-	if (res) {
-		printk("devpts: called with bogus options\n");
-		return -EINVAL;
-	}
-	return 0;
-}
+static struct super_operations devpts_sops = {
+	statfs:		simple_statfs,
+	remount_fs:	devpts_remount,
+};
 
 static int devpts_fill_super(struct super_block *s, void *data, int silent)
 {
-	int error = -ENOMEM;
 	struct inode * inode;
-	struct devpts_sb_info *sbi;
-
-	sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
-	if ( !sbi )
-		goto fail;
-	memset(sbi, 0, sizeof(*sbi));
-
-	sbi->magic  = DEVPTS_SBI_MAGIC;
-	sbi->max_ptys = unix98_max_ptys;
-	sbi->inodes = kmalloc(sizeof(struct inode *) * sbi->max_ptys, GFP_KERNEL);
-	if ( !sbi->inodes )
-		goto fail_free;
-	memset(sbi->inodes, 0, sizeof(struct inode *) * sbi->max_ptys);
 
-	if ( devpts_parse_options(data,sbi) && !silent) {
-		error = -EINVAL;
-		printk("devpts: called with bogus options\n");
-		goto fail_free;
-	}
-	s->u.generic_sbp = (void *) sbi;
 	s->s_blocksize = 1024;
 	s->s_blocksize_bits = 10;
 	s->s_magic = DEVPTS_SUPER_MAGIC;
@@ -147,27 +82,25 @@ static int devpts_fill_super(struct super_block *s, void *data, int silent)
 
 	inode = new_inode(s);
 	if (!inode)
-		goto fail_free;
+		goto fail;
 	inode->i_ino = 1;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	inode->i_blocks = 0;
 	inode->i_blksize = 1024;
 	inode->i_uid = inode->i_gid = 0;
 	inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
-	inode->i_op = &devpts_root_inode_operations;
-	inode->i_fop = &devpts_root_operations;
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
 	inode->i_nlink = 2;
 
-	s->s_root = d_alloc_root(inode);
+	devpts_root = s->s_root = d_alloc_root(inode);
 	if (s->s_root)
 		return 0;
 	
 	printk("devpts: get root dentry failed\n");
 	iput(inode);
-fail_free:
-	kfree(sbi);
 fail:
-	return error;
+	return -ENOMEM;
 }
 
 static struct super_block *devpts_get_sb(struct file_system_type *fs_type,
@@ -183,44 +116,52 @@ static struct file_system_type devpts_fs_type = {
 	kill_sb:	kill_anon_super,
 };
 
-void devpts_pty_new(int number, kdev_t device)
+/*
+ * The normal naming convention is simply /dev/pts/<number>; this conforms
+ * to the System V naming convention
+ */
+
+static struct dentry *get_node(int num)
 {
-	struct super_block *sb = devpts_mnt->mnt_sb;
-	struct devpts_sb_info *sbi = SBI(sb);
-	struct inode *inode;
-		
-	if ( sbi->inodes[number] )
-		return; /* Already registered, this does happen */
-		
-	inode = new_inode(sb);
+	char s[10];
+	struct dentry *root = devpts_root;
+	down(&root->d_inode->i_sem);
+	return lookup_one_len(s, root, sprintf(s, "%d", num));
+}
+
+void devpts_pty_new(int number, dev_t device)
+{
+	struct dentry *dentry;
+	struct inode *inode = new_inode(devpts_mnt->mnt_sb);
 	if (!inode)
 		return;
 	inode->i_ino = number+2;
-	inode->i_blocks = 0;
 	inode->i_blksize = 1024;
-	inode->i_uid = sbi->setuid ? sbi->uid : current->fsuid;
-	inode->i_gid = sbi->setgid ? sbi->gid : current->fsgid;
+	inode->i_uid = config.setuid ? config.uid : current->fsuid;
+	inode->i_gid = config.setgid ? config.gid : current->fsgid;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-	init_special_inode(inode, S_IFCHR|sbi->mode, kdev_t_to_nr(device));
+	init_special_inode(inode, S_IFCHR|config.mode, device);
 
-	if ( sbi->inodes[number] ) {
-		iput(inode);
-		return;
-	}
-	sbi->inodes[number] = inode;
+	dentry = get_node(number);
+	if (!IS_ERR(dentry) && !dentry->d_inode)
+		d_instantiate(dentry, inode);
+	up(&devpts_root->d_inode->i_sem);
 }
 
 void devpts_pty_kill(int number)
 {
-	struct super_block *sb = devpts_mnt->mnt_sb;
-	struct devpts_sb_info *sbi = SBI(sb);
-	struct inode *inode = sbi->inodes[number];
+	struct dentry *dentry = get_node(number);
 
-	if ( inode ) {
-		sbi->inodes[number] = NULL;
-		inode->i_nlink--;
-		iput(inode);
+	if (!IS_ERR(dentry)) {
+		struct inode *inode = dentry->d_inode;
+		if (inode) {
+			inode->i_nlink--;
+			d_delete(dentry);
+			dput(dentry);
+		}
+		dput(dentry);
 	}
+	up(&devpts_root->d_inode->i_sem);
 }
 
 static int __init init_devpts_fs(void)
@@ -231,22 +172,12 @@ static int __init init_devpts_fs(void)
 		err = PTR_ERR(devpts_mnt);
 		if (!IS_ERR(devpts_mnt))
 			err = 0;
-#ifdef MODULE
-		if ( !err ) {
-			devpts_upcall_new  = devpts_pty_new;
-			devpts_upcall_kill = devpts_pty_kill;
-		}
-#endif
 	}
 	return err;
 }
 
 static void __exit exit_devpts_fs(void)
 {
-#ifdef MODULE
-	devpts_upcall_new  = NULL;
-	devpts_upcall_kill = NULL;
-#endif
 	unregister_filesystem(&devpts_fs_type);
 	kern_umount(devpts_mnt);
 }
@@ -254,4 +185,3 @@ static void __exit exit_devpts_fs(void)
 module_init(init_devpts_fs)
 module_exit(exit_devpts_fs)
 MODULE_LICENSE("GPL");
-
diff --git a/fs/devpts/root.c b/fs/devpts/root.c
deleted file mode 100644
index 44e7bd7e3baa..000000000000
--- a/fs/devpts/root.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/devpts/root.c
- *
- *  Copyright 1998 H. Peter Anvin -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/errno.h>
-#include <linux/stat.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/smp_lock.h>
-#include "devpts_i.h"
-
-static int devpts_root_readdir(struct file *,void *,filldir_t);
-static struct dentry *devpts_root_lookup(struct inode *,struct dentry *);
-static int devpts_revalidate(struct dentry *, int);
-
-struct file_operations devpts_root_operations = {
-	read:		generic_read_dir,
-	readdir:	devpts_root_readdir,
-};
-
-struct inode_operations devpts_root_inode_operations = {
-	lookup:		devpts_root_lookup,
-};
-
-static struct dentry_operations devpts_dentry_operations = {
-	d_revalidate:	devpts_revalidate,
-};
-
-/*
- * The normal naming convention is simply /dev/pts/<number>; this conforms
- * to the System V naming convention
- */
-
-#define genptsname(buf,num) sprintf(buf, "%d", num)
-
-static int devpts_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	struct inode * inode = filp->f_dentry->d_inode;
-	struct devpts_sb_info * sbi = SBI(filp->f_dentry->d_inode->i_sb);
-	off_t nr;
-	char numbuf[16];
-
-	lock_kernel();
-
-	nr = filp->f_pos;
-
-	switch(nr)
-	{
-	case 0:
-		if (filldir(dirent, ".", 1, nr, inode->i_ino, DT_DIR) < 0)
-			goto out;
-		filp->f_pos = ++nr;
-		/* fall through */
-	case 1:
-		if (filldir(dirent, "..", 2, nr, inode->i_ino, DT_DIR) < 0)
-			goto out;
-		filp->f_pos = ++nr;
-		/* fall through */
-	default:
-		while ( nr - 2 < sbi->max_ptys ) {
-			int ptynr = nr - 2;
-			if ( sbi->inodes[ptynr] ) {
-				genptsname(numbuf, ptynr);
-				if ( filldir(dirent, numbuf, strlen(numbuf), nr, nr, DT_CHR) < 0 )
-					goto out;
-			}
-			filp->f_pos = ++nr;
-		}
-		break;
-	}
-
-out:
-	unlock_kernel();
-	return 0;
-}
-
-/*
- * Revalidate is called on every cache lookup.  We use it to check that
- * the pty really does still exist.  Never revalidate negative dentries;
- * for simplicity (fix later?)
- */
-static int devpts_revalidate(struct dentry * dentry, int flags)
-{
-	struct devpts_sb_info *sbi;
-
-	if ( !dentry->d_inode )
-		return 0;
-
-	sbi = SBI(dentry->d_inode->i_sb);
-
-	return ( sbi->inodes[dentry->d_inode->i_ino - 2] == dentry->d_inode );
-}
-
-static struct dentry *devpts_root_lookup(struct inode * dir, struct dentry * dentry)
-{
-	struct devpts_sb_info *sbi = SBI(dir->i_sb);
-	unsigned int entry;
-	int i;
-	const char *p;
-
-	dentry->d_op    = &devpts_dentry_operations;
-
-	if ( dentry->d_name.len == 1 && dentry->d_name.name[0] == '0' ) {
-		entry = 0;
-	} else if ( dentry->d_name.len < 1 ) {
-		return NULL;
-	} else {
-		p = dentry->d_name.name;
-		if ( *p < '1' || *p > '9' )
-			return NULL;
-		entry = *p++ - '0';
-
-		for ( i = dentry->d_name.len-1 ; i ; i-- ) {
-			unsigned int nentry = *p++ - '0';
-			if ( nentry > 9 )
-				return NULL;
-			if ( entry >= ~0U/10 )
-				return NULL;
-			entry = nentry + entry * 10;
-		}
-	}
-
-	if ( entry >= sbi->max_ptys )
-		return NULL;
-
-	lock_kernel();
-	if ( sbi->inodes[entry] )
-		atomic_inc(&sbi->inodes[entry]->i_count);
-	
-	d_add(dentry, sbi->inodes[entry]);
-	unlock_kernel();
-
-	return NULL;
-}
diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
index 38164e6b2bbf..4def3512b355 100644
--- a/include/linux/devpts_fs.h
+++ b/include/linux/devpts_fs.h
@@ -10,64 +10,23 @@
  *
  * ------------------------------------------------------------------------- */
 
-/*
- * Prototypes for the pty driver <-> devpts filesystem interface.  Most
- * of this is really just a hack so we can exclude it or build it as a
- * module, and probably should go away eventually.
- */
-
 #ifndef _LINUX_DEVPTS_FS_H
 #define _LINUX_DEVPTS_FS_H 1
 
-#include <linux/config.h>
-#include <linux/kdev_t.h>
-#include <linux/tty.h>
-
 #ifdef CONFIG_DEVPTS_FS
 
-void devpts_pty_new(int, kdev_t);
-void devpts_pty_kill(int);
-#define unix98_max_ptys               NR_PTYS * UNIX98_NR_MAJORS;
-
-#elif defined(CONFIG_DEVPTS_FS_MODULE)
+void devpts_pty_new(int, dev_t);	/* mknod in devpts */
+void devpts_pty_kill(int);		/* unlink */
 
-#ifdef BUILDING_PTY_C
-void (*devpts_upcall_new)(int,kdev_t) = NULL;
-void (*devpts_upcall_kill)(int)       = NULL;
-unsigned int unix98_max_ptys          = NR_PTYS * UNIX98_NR_MAJORS;
-
-EXPORT_SYMBOL(devpts_upcall_new);
-EXPORT_SYMBOL(devpts_upcall_kill);
-EXPORT_SYMBOL(unix98_max_ptys);
 #else
-extern void (*devpts_upcall_new)(int,kdev_t);
-extern void (*devpts_upcall_kill)(int);
-extern unsigned int unix98_max_ptys;
-#endif
 
-#ifndef BUILDING_DEVPTS
-static inline void
-devpts_pty_new(int line, kdev_t device)
+static inline void devpts_pty_new(int line, dev_t device)
 {
-	if ( devpts_upcall_new )
-		return devpts_upcall_new(line,device);
 }
 
-static inline void
-devpts_pty_kill(int line)
+static inline void devpts_pty_kill(int line)
 {
-	if ( devpts_upcall_kill )
-		return devpts_upcall_kill(line);
 }
-#endif
-
-#else  /* No /dev/pts filesystem at all */
-
-static inline void
-devpts_pty_new(int line, kdev_t device) { }
-
-static inline void
-devpts_pty_kill(int line) { }
 
 #endif
 
-- 
cgit v1.2.3


From 67addbac9d8cc2f9c21711fe4902a165f1783ae8 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Thu, 4 Jul 2002 08:53:22 -0700
Subject: [PATCH] cdrom.c cleanups

	* Bunch of functions in cdrom.c used to get kdev_t and use it
only to do cdrom_find_device(dev), even though their callers already
had struct cdrom_device_info * in question.  Switched to passing
said pointer directly.
	* useless exports removed; stuff not used outside of cdrom.c
made static.
---
 drivers/cdrom/cdrom.c | 50 ++++++++++++++++++++------------------------------
 drivers/ide/ide-cd.c  |  8 +++-----
 drivers/scsi/sr.c     |  4 ++--
 include/linux/cdrom.h | 10 +---------
 4 files changed, 26 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index fc2fa884e1ed..2c13712d90c5 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -318,8 +318,9 @@ static void sanitize_format(union cdrom_addr *addr,
 static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
 		     unsigned long arg);
 
-int cdrom_get_last_written(kdev_t dev, long *last_written);
-int cdrom_get_next_writable(kdev_t dev, long *next_writable);
+int cdrom_get_last_written(struct cdrom_device_info *, long *);
+static int cdrom_get_next_writable(struct cdrom_device_info *, long *);
+static void cdrom_count_tracks(struct cdrom_device_info *, tracktype*);
 
 #ifdef CONFIG_SYSCTL
 static void cdrom_sysctl_register(void);
@@ -436,7 +437,7 @@ int unregister_cdrom(struct cdrom_device_info *unreg)
 	return 0;
 }
 
-struct cdrom_device_info *cdrom_find_device(kdev_t dev)
+static struct cdrom_device_info *cdrom_find_device(kdev_t dev)
 {
 	struct cdrom_device_info *cdi;
 
@@ -775,7 +776,7 @@ static int cdrom_load_unload(struct cdrom_device_info *cdi, int slot)
 	return cdi->ops->generic_packet(cdi, &cgc);
 }
 
-int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
+static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
 {
 	struct cdrom_changer_info info;
 	int curslot;
@@ -859,7 +860,7 @@ int cdrom_media_changed(kdev_t dev)
 }
 
 /* badly broken, I know. Is due for a fixup anytime. */
-void cdrom_count_tracks(struct cdrom_device_info *cdi, tracktype* tracks)
+static void cdrom_count_tracks(struct cdrom_device_info *cdi, tracktype* tracks)
 {
 	struct cdrom_tochdr header;
 	struct cdrom_tocentry entry;
@@ -1921,7 +1922,6 @@ static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
 {		
 	struct cdrom_device_ops *cdo = cdi->ops;
 	struct cdrom_generic_command cgc;
-	kdev_t dev = cdi->dev;
 	char buffer[32];
 	int ret = 0;
 
@@ -2193,7 +2193,7 @@ static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
 	case CDROM_NEXT_WRITABLE: {
 		long next = 0;
 		cdinfo(CD_DO_IOCTL, "entering CDROM_NEXT_WRITABLE\n"); 
-		if ((ret = cdrom_get_next_writable(dev, &next)))
+		if ((ret = cdrom_get_next_writable(cdi, &next)))
 			return ret;
 		IOCTL_OUT(arg, long, next);
 		return 0;
@@ -2201,7 +2201,7 @@ static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
 	case CDROM_LAST_WRITTEN: {
 		long last = 0;
 		cdinfo(CD_DO_IOCTL, "entering CDROM_LAST_WRITTEN\n"); 
-		if ((ret = cdrom_get_last_written(dev, &last)))
+		if ((ret = cdrom_get_last_written(cdi, &last)))
 			return ret;
 		IOCTL_OUT(arg, long, last);
 		return 0;
@@ -2211,10 +2211,9 @@ static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
 	return -ENOTTY;
 }
 
-int cdrom_get_track_info(kdev_t dev, __u16 track, __u8 type,
+static int cdrom_get_track_info(struct cdrom_device_info *cdi, __u16 track, __u8 type,
 			 track_information *ti)
 {
-        struct cdrom_device_info *cdi = cdrom_find_device(dev);
 	struct cdrom_device_ops *cdo = cdi->ops;
 	struct cdrom_generic_command cgc;
 	int ret;
@@ -2241,9 +2240,8 @@ int cdrom_get_track_info(kdev_t dev, __u16 track, __u8 type,
 }
 
 /* requires CD R/RW */
-int cdrom_get_disc_info(kdev_t dev, disc_information *di)
+static int cdrom_get_disc_info(struct cdrom_device_info *cdi, disc_information *di)
 {
-	struct cdrom_device_info *cdi = cdrom_find_device(dev);
 	struct cdrom_device_ops *cdo = cdi->ops;
 	struct cdrom_generic_command cgc;
 	int ret;
@@ -2273,9 +2271,8 @@ int cdrom_get_disc_info(kdev_t dev, disc_information *di)
 
 /* return the last written block on the CD-R media. this is for the udf
    file system. */
-int cdrom_get_last_written(kdev_t dev, long *last_written)
-{	
-	struct cdrom_device_info *cdi = cdrom_find_device(dev);
+int cdrom_get_last_written(struct cdrom_device_info *cdi, long *last_written)
+{
 	struct cdrom_tocentry toc;
 	disc_information di;
 	track_information ti;
@@ -2285,17 +2282,17 @@ int cdrom_get_last_written(kdev_t dev, long *last_written)
 	if (!CDROM_CAN(CDC_GENERIC_PACKET))
 		goto use_toc;
 
-	if ((ret = cdrom_get_disc_info(dev, &di)))
+	if ((ret = cdrom_get_disc_info(cdi, &di)))
 		goto use_toc;
 
 	last_track = (di.last_track_msb << 8) | di.last_track_lsb;
-	if ((ret = cdrom_get_track_info(dev, last_track, 1, &ti)))
+	if ((ret = cdrom_get_track_info(cdi, last_track, 1, &ti)))
 		goto use_toc;
 
 	/* if this track is blank, try the previous. */
 	if (ti.blank) {
 		last_track--;
-		if ((ret = cdrom_get_track_info(dev, last_track, 1, &ti)))
+		if ((ret = cdrom_get_track_info(cdi, last_track, 1, &ti)))
 			goto use_toc;
 	}
 
@@ -2325,9 +2322,8 @@ use_toc:
 }
 
 /* return the next writable block. also for udf file system. */
-int cdrom_get_next_writable(kdev_t dev, long *next_writable)
+static int cdrom_get_next_writable(struct cdrom_device_info *cdi, long *next_writable)
 {
-	struct cdrom_device_info *cdi = cdrom_find_device(dev);
 	disc_information di;
 	track_information ti;
 	__u16 last_track;
@@ -2336,17 +2332,17 @@ int cdrom_get_next_writable(kdev_t dev, long *next_writable)
 	if (!CDROM_CAN(CDC_GENERIC_PACKET))
 		goto use_last_written;
 
-	if ((ret = cdrom_get_disc_info(dev, &di)))
+	if ((ret = cdrom_get_disc_info(cdi, &di)))
 		goto use_last_written;
 
 	last_track = (di.last_track_msb << 8) | di.last_track_lsb;
-	if ((ret = cdrom_get_track_info(dev, last_track, 1, &ti)))
+	if ((ret = cdrom_get_track_info(cdi, last_track, 1, &ti)))
 		goto use_last_written;
 
         /* if this track is blank, try the previous. */
 	if (ti.blank) {
 		last_track--;
-		if ((ret = cdrom_get_track_info(dev, last_track, 1, &ti)))
+		if ((ret = cdrom_get_track_info(cdi, last_track, 1, &ti)))
 			goto use_last_written;
 	}
 
@@ -2359,7 +2355,7 @@ int cdrom_get_next_writable(kdev_t dev, long *next_writable)
 	return 0;
 
 use_last_written:
-	if ((ret = cdrom_get_last_written(dev, next_writable))) {
+	if ((ret = cdrom_get_last_written(cdi, next_writable))) {
 		*next_writable = 0;
 		return ret;
 	} else {
@@ -2368,11 +2364,7 @@ use_last_written:
 	}
 }
 
-EXPORT_SYMBOL(cdrom_get_disc_info);
-EXPORT_SYMBOL(cdrom_get_track_info);
-EXPORT_SYMBOL(cdrom_get_next_writable);
 EXPORT_SYMBOL(cdrom_get_last_written);
-EXPORT_SYMBOL(cdrom_count_tracks);
 EXPORT_SYMBOL(register_cdrom);
 EXPORT_SYMBOL(unregister_cdrom);
 EXPORT_SYMBOL(cdrom_open);
@@ -2380,11 +2372,9 @@ EXPORT_SYMBOL(cdrom_release);
 EXPORT_SYMBOL(cdrom_ioctl);
 EXPORT_SYMBOL(cdrom_media_changed);
 EXPORT_SYMBOL(cdrom_number_of_slots);
-EXPORT_SYMBOL(cdrom_select_disc);
 EXPORT_SYMBOL(cdrom_mode_select);
 EXPORT_SYMBOL(cdrom_mode_sense);
 EXPORT_SYMBOL(init_cdrom_command);
-EXPORT_SYMBOL(cdrom_find_device);
 
 #ifdef CONFIG_SYSCTL
 
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 104c928813a4..0e0388efd705 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1937,9 +1937,9 @@ static int cdrom_read_tocentry(struct ata_device *drive, int trackno, int msf_fl
 /* Try to read the entire TOC for the disk into our internal buffer. */
 static int cdrom_read_toc(struct ata_device *drive, struct request_sense *sense)
 {
-	int minor, stat, ntracks, i;
-	kdev_t dev;
+	int stat, ntracks, i;
 	struct cdrom_info *info = drive->driver_data;
+	struct cdrom_device_info *cdi = &info->devinfo;
 	struct atapi_toc *toc = info->toc;
 	struct {
 		struct atapi_toc_header hdr;
@@ -2071,10 +2071,8 @@ static int cdrom_read_toc(struct ata_device *drive, struct request_sense *sense)
 	toc->xa_flag = (ms_tmp.hdr.first_track != ms_tmp.hdr.last_track);
 
 	/* Now try to get the total cdrom capacity. */
-	minor = (drive->select.b.unit) << PARTN_BITS;
-	dev = mk_kdev(drive->channel->major, minor);
 	/* FIXME: This is making worng assumptions about register layout. */
-	stat = cdrom_get_last_written(dev, (unsigned long *) &toc->capacity);
+	stat = cdrom_get_last_written(cdi, (unsigned long *) &toc->capacity);
 	if (stat)
 		stat = cdrom_read_capacity(drive, &toc->capacity, sense);
 	if (stat)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 0e28dc69652b..4b06fa27f4a3 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -522,8 +522,8 @@ void get_sectorsize(int i)
 		SCp->needs_sector_size = 1;
 	} else {
 #if 0
-		if (cdrom_get_last_written(mkdev(MAJOR_NR, i),
-					   &scsi_CDs[i].capacity))
+		if (cdrom_get_last_written(&SCp->cdi,
+					   &SCp->capacity))
 #endif
 			SCp->capacity = 1 + ((buffer[0] << 24) |
 						    (buffer[1] << 16) |
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index b3a349fc341d..c73c3c374e5d 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -805,11 +805,8 @@ typedef struct {
     long error;
 } tracktype;
 
-extern void cdrom_count_tracks(struct cdrom_device_info *cdi,tracktype* tracks);
-extern int cdrom_get_next_writable(kdev_t dev, long *next_writable);
-extern int cdrom_get_last_written(kdev_t dev, long *last_written);
+extern int cdrom_get_last_written(struct cdrom_device_info *cdi, long *last_written);
 extern int cdrom_number_of_slots(struct cdrom_device_info *cdi);
-extern int cdrom_select_disc(struct cdrom_device_info *cdi, int slot);
 extern int cdrom_mode_select(struct cdrom_device_info *cdi,
 			     struct cdrom_generic_command *cgc);
 extern int cdrom_mode_sense(struct cdrom_device_info *cdi,
@@ -817,7 +814,6 @@ extern int cdrom_mode_sense(struct cdrom_device_info *cdi,
 			    int page_code, int page_control);
 extern void init_cdrom_command(struct cdrom_generic_command *cgc,
 			       void *buffer, int len, int type);
-extern struct cdrom_device_info *cdrom_find_device(kdev_t dev);
 
 typedef struct {
 	__u16 disc_information_length;
@@ -901,10 +897,6 @@ typedef struct {
 	__u32 last_rec_address;
 } track_information;
 
-extern int cdrom_get_disc_info(kdev_t dev, disc_information *di);
-extern int cdrom_get_track_info(kdev_t dev, __u16 track, __u8 type,
-				track_information *ti);
-
 /* The SCSI spec says there could be 256 slots. */
 #define CDROM_MAX_SLOTS	256
 
-- 
cgit v1.2.3


From a99f1593786718442ab84cb3a3f6ae34af41b7b4 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Thu, 4 Jul 2002 08:53:28 -0700
Subject: [PATCH] kdev_t crapectomy

	* since the last caller of is_read_only() is gone, the function
itself is removed.
	* destroy_buffers() is not used anymore; gone.
	* fsync_dev() is gone; the only user is (broken) lvm.c and first
step in fixing lvm.c will consist of propagating struct block_device *
anyway; at that point we'll just use fsync_bdev() in there.
	* prototype of bio_ioctl() removed - function doesn't exist
anymore.
---
 Documentation/filesystems/porting | 18 ++++++++++++++++++
 drivers/block/ll_rw_blk.c         | 16 +++++++---------
 fs/buffer.c                       | 16 ----------------
 include/linux/bio.h               |  2 --
 include/linux/buffer_head.h       |  2 --
 include/linux/fs.h                |  1 -
 kernel/ksyms.c                    |  2 --
 7 files changed, 25 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 85281b6f4ff0..18daaaeb5d45 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -228,3 +228,21 @@ anything from oops to silent memory corruption.
 	Use bdev_read_only(bdev) instead of is_read_only(kdev).  The latter
 is still alive, but only because of the mess in drivers/s390/block/dasd.c.
 As soon as it gets fixed is_read_only() will die.
+
+---
+[mandatory]
+
+	is_read_only() is gone; use bdev_read_only() instead.
+
+---
+[mandatory]
+
+	destroy_buffers() is gone; use invalidate_bdev().
+
+---
+[mandatory]
+
+	fsync_dev() is gone; use fsync_bdev().  NOTE: lvm breakage is
+deliberate; as soon as struct block_device * is propagated in a reasonable
+way by that code fixing will become trivial; until then nothing can be
+done.
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 318ff55529fb..c07fdf04013f 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -1212,21 +1212,19 @@ void blk_put_request(struct request *rq)
 
 static long ro_bits[MAX_BLKDEV][8];
 
-int is_read_only(kdev_t dev)
+int bdev_read_only(struct block_device *bdev)
 {
 	int minor,major;
 
-	major = major(dev);
-	minor = minor(dev);
-	if (major < 0 || major >= MAX_BLKDEV) return 0;
+	if (!bdev)
+		return 0;
+	major = MAJOR(bdev->bd_dev);
+	minor = MINOR(bdev->bd_dev);
+	if (major < 0 || major >= MAX_BLKDEV)
+		return 0;
 	return ro_bits[major][minor >> 5] & (1 << (minor & 31));
 }
 
-int bdev_read_only(struct block_device *bdev)
-{
-	return bdev && is_read_only(to_kdev_t(bdev->bd_dev));
-}
-
 void set_device_ro(kdev_t dev,int flag)
 {
 	int minor,major;
diff --git a/fs/buffer.c b/fs/buffer.c
index dde8e7d9bae6..02760245c846 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -243,22 +243,6 @@ int fsync_bdev(struct block_device *bdev)
 	return sync_blockdev(bdev);
 }
 
-/*
- * Write out and wait upon all dirty data associated with this
- * kdev_t.   Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_dev(kdev_t dev)
-{
-	struct block_device *bdev = bdget(kdev_t_to_nr(dev));
-	if (bdev) {
-		int res = fsync_bdev(bdev);
-		bdput(bdev);
-		return res;
-	}
-	return 0;
-}
-
 /*
  * sync everything.
  */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ffc38fca9c1e..1b4004652cde 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -203,8 +203,6 @@ extern struct bio *bio_copy(struct bio *, int, int);
 
 extern inline void bio_init(struct bio *);
 
-extern int bio_ioctl(kdev_t, unsigned int, unsigned long);
-
 #ifdef CONFIG_HIGHMEM
 /*
  * remember to add offset! and never ever reenable interrupts between a
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 4fc6bab55825..e0eb4119b250 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -121,7 +121,6 @@ BUFFER_FNS(Boundary, boundary)
 #define page_has_buffers(page)	PagePrivate(page)
 
 #define invalidate_buffers(dev)	__invalidate_buffers((dev), 0)
-#define destroy_buffers(dev)	__invalidate_buffers((dev), 1)
 
 
 /*
@@ -156,7 +155,6 @@ int sync_blockdev(struct block_device *bdev);
 void __wait_on_buffer(struct buffer_head *);
 void sleep_on_buffer(struct buffer_head *bh);
 void wake_up_buffer(struct buffer_head *bh);
-int fsync_dev(kdev_t);
 int fsync_bdev(struct block_device *);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 322e644060cf..1292fc4474cc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1220,7 +1220,6 @@ extern void ll_rw_block(int, int, struct buffer_head * bh[]);
 extern int submit_bh(int, struct buffer_head *);
 struct bio;
 extern int submit_bio(int, struct bio *);
-extern int is_read_only(kdev_t);
 extern int bdev_read_only(struct block_device *);
 extern int set_blocksize(struct block_device *, int);
 extern int sb_set_blocksize(struct super_block *, int);
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index cbf06ff3725b..a128720c1b93 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -185,7 +185,6 @@ EXPORT_SYMBOL(invalidate_inodes);
 EXPORT_SYMBOL(invalidate_device);
 EXPORT_SYMBOL(invalidate_inode_pages);
 EXPORT_SYMBOL(truncate_inode_pages);
-EXPORT_SYMBOL(fsync_dev);
 EXPORT_SYMBOL(fsync_bdev);
 EXPORT_SYMBOL(permission);
 EXPORT_SYMBOL(vfs_permission);
@@ -329,7 +328,6 @@ EXPORT_SYMBOL(tty_std_termios);
 /* block device driver support */
 EXPORT_SYMBOL(blk_size);
 EXPORT_SYMBOL(blk_dev);
-EXPORT_SYMBOL(is_read_only);
 EXPORT_SYMBOL(bdev_read_only);
 EXPORT_SYMBOL(set_device_ro);
 EXPORT_SYMBOL(bmap);
-- 
cgit v1.2.3


From 480f41061c3442a39824591c4767ab6bdbe860f9 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Thu, 4 Jul 2002 08:53:33 -0700
Subject: [PATCH] raid kdev_t cleanups (part 1)

	* ->error_handler() switched to struct block_device *.
	* md_sync_acct() switched to struct block_device *.
	* raid5 struct disk_info ->dev is gone - we use ->bdev everywhere.
	* bunch of kdev_same() when we have corresponding struct block_device *
and can simply compare them is removed from drivers/md/*.c
---
 drivers/md/md.c            |  5 ++--
 drivers/md/multipath.c     |  6 ++---
 drivers/md/raid1.c         |  8 +++---
 drivers/md/raid5.c         | 63 ++++++++++++++++++++++------------------------
 include/linux/raid/md.h    |  2 +-
 include/linux/raid/md_k.h  |  2 +-
 include/linux/raid/raid5.h |  1 -
 7 files changed, 42 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 741ecff32e57..433b3c578cb6 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2924,7 +2924,7 @@ int md_error(mddev_t *mddev, struct block_device *bdev)
 	if (!rrdev || rrdev->faulty)
 		return 0;
 	if (!mddev->pers->error_handler
-			|| mddev->pers->error_handler(mddev,rdev) <= 0) {
+			|| mddev->pers->error_handler(mddev,bdev) <= 0) {
 		rrdev->faulty = 1;
 	} else
 		return 1;
@@ -3136,8 +3136,9 @@ mdp_disk_t *get_spare(mddev_t *mddev)
 }
 
 static unsigned int sync_io[DK_MAX_MAJOR][DK_MAX_DISK];
-void md_sync_acct(kdev_t dev, unsigned long nr_sectors)
+void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
 {
+	kdev_t dev = to_kdev_t(bdev->bd_dev);
 	unsigned int major = major(dev);
 	unsigned int index;
 
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 32dc200aee66..87cfa0735212 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -320,7 +320,7 @@ static void mark_disk_bad (mddev_t *mddev, int failed)
 /*
  * Careful, this can execute in IRQ contexts as well!
  */
-static int multipath_error (mddev_t *mddev, kdev_t dev)
+static int multipath_error (mddev_t *mddev, struct block_device *bdev)
 {
 	multipath_conf_t *conf = mddev_to_conf(mddev);
 	struct multipath_info * multipaths = conf->multipaths;
@@ -345,7 +345,7 @@ static int multipath_error (mddev_t *mddev, kdev_t dev)
 		 * which has just failed.
 		 */
 		for (i = 0; i < disks; i++) {
-			if (kdev_same(multipaths[i].dev, dev) && !multipaths[i].operational)
+			if (multipaths[i].bdev == bdev && !multipaths[i].operational)
 				return 0;
 		}
 		printk (LAST_DISK);
@@ -354,7 +354,7 @@ static int multipath_error (mddev_t *mddev, kdev_t dev)
 		 * Mark disk as unusable
 		 */
 		for (i = 0; i < disks; i++) {
-			if (kdev_same(multipaths[i].dev,dev) && multipaths[i].operational) {
+			if (multipaths[i].bdev == bdev && multipaths[i].operational) {
 				mark_disk_bad(mddev, i);
 				break;
 			}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 4c855576f9fe..b996922b3347 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -592,7 +592,7 @@ static void mark_disk_bad(mddev_t *mddev, int failed)
 	printk(DISK_FAILED, partition_name(mirror->dev), conf->working_disks);
 }
 
-static int error(mddev_t *mddev, kdev_t dev)
+static int error(mddev_t *mddev, struct block_device *bdev)
 {
 	conf_t *conf = mddev_to_conf(mddev);
 	mirror_info_t * mirrors = conf->mirrors;
@@ -607,7 +607,7 @@ static int error(mddev_t *mddev, kdev_t dev)
 	 * else mark the drive as failed
 	 */
 	for (i = 0; i < disks; i++)
-		if (kdev_same(mirrors[i].dev, dev) && mirrors[i].operational)
+		if (mirrors[i].bdev == bdev && mirrors[i].operational)
 			break;
 	if (i == disks)
 		return 0;
@@ -1045,7 +1045,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
 		if (!mbio)
 			continue;
 
-		md_sync_acct(to_kdev_t(mbio->bi_bdev->bd_dev), mbio->bi_size >> 9);
+		md_sync_acct(mbio->bi_bdev, mbio->bi_size >> 9);
 		generic_make_request(mbio);
 		atomic_inc(&conf->mirrors[i].nr_pending);
 	}
@@ -1217,7 +1217,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
 		BUG();
 	r1_bio->read_bio = read_bio;
 
-	md_sync_acct(to_kdev_t(read_bio->bi_bdev->bd_dev), nr_sectors);
+	md_sync_acct(read_bio->bi_bdev, nr_sectors);
 
 	generic_make_request(read_bio);
 	atomic_inc(&conf->mirrors[conf->last_used].nr_pending);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f19d8d936f44..7e3581804db1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -439,8 +439,9 @@ static void raid5_build_block (struct stripe_head *sh, int i)
 		dev->sector = compute_blocknr(sh, i);
 }
 
-static int error (mddev_t *mddev, kdev_t dev)
+static int error(mddev_t *mddev, struct block_device *bdev)
 {
+	kdev_t dev = to_kdev_t(bdev->bd_dev);
 	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
 	mdp_super_t *sb = mddev->sb;
 	struct disk_info *disk;
@@ -449,33 +450,33 @@ static int error (mddev_t *mddev, kdev_t dev)
 	PRINTK("raid5: error called\n");
 
 	for (i = 0, disk = conf->disks; i < conf->raid_disks; i++, disk++) {
-		if (kdev_same(disk->dev, dev)) {
-			if (disk->operational) {
-				disk->operational = 0;
-				mark_disk_faulty(sb->disks+disk->number);
-				mark_disk_nonsync(sb->disks+disk->number);
-				mark_disk_inactive(sb->disks+disk->number);
-				sb->active_disks--;
-				sb->working_disks--;
-				sb->failed_disks++;
-				mddev->sb_dirty = 1;
-				conf->working_disks--;
-				conf->failed_disks++;
-				md_wakeup_thread(conf->thread);
-				printk (KERN_ALERT
-					"raid5: Disk failure on %s, disabling device."
-					" Operation continuing on %d devices\n",
-					partition_name (dev), conf->working_disks);
-			}
-			return 0;
+		if (disk->bdev != bdev)
+			continue;
+		if (disk->operational) {
+			disk->operational = 0;
+			mark_disk_faulty(sb->disks+disk->number);
+			mark_disk_nonsync(sb->disks+disk->number);
+			mark_disk_inactive(sb->disks+disk->number);
+			sb->active_disks--;
+			sb->working_disks--;
+			sb->failed_disks++;
+			mddev->sb_dirty = 1;
+			conf->working_disks--;
+			conf->failed_disks++;
+			md_wakeup_thread(conf->thread);
+			printk (KERN_ALERT
+				"raid5: Disk failure on %s, disabling device."
+				" Operation continuing on %d devices\n",
+				partition_name (dev), conf->working_disks);
 		}
+		return 0;
 	}
 	/*
 	 * handle errors in spares (during reconstruction)
 	 */
 	if (conf->spare) {
 		disk = conf->spare;
-		if (kdev_same(disk->dev, dev)) {
+		if (disk->bdev == bdev) {
 			printk (KERN_ALERT
 				"raid5: Disk failure on spare %s\n",
 				partition_name (dev));
@@ -1017,7 +1018,7 @@ static void handle_stripe(struct stripe_head *sh)
 					locked++;
 					PRINTK("Reading block %d (sync=%d)\n", i, syncing);
 					if (syncing)
-						md_sync_acct(conf->disks[i].dev, STRIPE_SECTORS);
+						md_sync_acct(conf->disks[i].bdev, STRIPE_SECTORS);
 				}
 			}
 		}
@@ -1156,9 +1157,9 @@ static void handle_stripe(struct stripe_head *sh)
 			locked++;
 			set_bit(STRIPE_INSYNC, &sh->state);
 			if (conf->disks[failed_num].operational)
-				md_sync_acct(conf->disks[failed_num].dev, STRIPE_SECTORS);
+				md_sync_acct(conf->disks[failed_num].bdev, STRIPE_SECTORS);
 			else if ((spare=conf->spare))
-				md_sync_acct(spare->dev, STRIPE_SECTORS);
+				md_sync_acct(spare->bdev, STRIPE_SECTORS);
 
 		}
 	}
@@ -1435,7 +1436,6 @@ static int run (mddev_t *mddev)
 			}
 			disk->number = desc->number;
 			disk->raid_disk = raid_disk;
-			disk->dev = rdev->dev;
 			disk->bdev = rdev->bdev;
 
 			disk->operational = 0;
@@ -1462,7 +1462,6 @@ static int run (mddev_t *mddev)
 	
 			disk->number = desc->number;
 			disk->raid_disk = raid_disk;
-			disk->dev = rdev->dev;
 			disk->bdev = rdev->bdev;
 			disk->operational = 1;
 			disk->used_slot = 1;
@@ -1475,7 +1474,6 @@ static int run (mddev_t *mddev)
 			printk(KERN_INFO "raid5: spare disk %s\n", partition_name(rdev->dev));
 			disk->number = desc->number;
 			disk->raid_disk = raid_disk;
-			disk->dev = rdev->dev;
 			disk->bdev = rdev->bdev;
 
 			disk->operational = 0;
@@ -1495,7 +1493,6 @@ static int run (mddev_t *mddev)
 
 			disk->number = desc->number;
 			disk->raid_disk = raid_disk;
-			disk->dev = NODEV;
 			disk->bdev = NULL;
 
 			disk->operational = 0;
@@ -1691,7 +1688,9 @@ static void print_raid5_conf (raid5_conf_t *conf)
 		printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n",
 			i, tmp->spare,tmp->operational,
 			tmp->number,tmp->raid_disk,tmp->used_slot,
-			partition_name(tmp->dev));
+			partition_name(tmp->bdev ?
+					to_kdev_t(tmp->bdev->bd_dev):
+					NODEV));
 	}
 }
 
@@ -1903,7 +1902,7 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
 
 		*d = failed_desc;
 
-		if (kdev_none(sdisk->dev))
+		if (!sdisk->bdev)
 			sdisk->used_slot = 0;
 
 		/*
@@ -1931,7 +1930,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
 			err = 1;
 			goto abort;
 		}
-		rdisk->dev = NODEV;
 		rdisk->bdev = NULL;
 		rdisk->used_slot = 0;
 
@@ -1949,9 +1947,8 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
 
 		adisk->number = added_desc->number;
 		adisk->raid_disk = added_desc->raid_disk;
-		adisk->dev = mk_kdev(added_desc->major,added_desc->minor);
 		/* it will be held open by rdev */
-		adisk->bdev = bdget(kdev_t_to_nr(adisk->dev));
+		adisk->bdev = bdget(MKDEV(added_desc->major,added_desc->minor));
 
 		adisk->operational = 0;
 		adisk->write_only = 0;
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index 19ebeb0a4988..6dfe08ccfbfe 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -77,7 +77,7 @@ extern void md_wakeup_thread(mdk_thread_t *thread);
 extern void md_interrupt_thread (mdk_thread_t *thread);
 extern void md_update_sb (mddev_t *mddev);
 extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
-extern void md_sync_acct(kdev_t dev, unsigned long nr_sectors);
+extern void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors);
 extern int md_error (mddev_t *mddev, struct block_device *bdev);
 extern int md_run_setup(void);
 
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index e238bd222ea2..69282d99a9eb 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -213,7 +213,7 @@ struct mdk_personality_s
 	int (*run)(mddev_t *mddev);
 	int (*stop)(mddev_t *mddev);
 	int (*status)(char *page, mddev_t *mddev);
-	int (*error_handler)(mddev_t *mddev, kdev_t dev);
+	int (*error_handler)(mddev_t *mddev, struct block_device *bdev);
 
 /*
  * Some personalities (RAID-1, RAID-5) can have disks hot-added and
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index e14864259ffd..7f8beb8acdaa 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -192,7 +192,6 @@ struct stripe_head {
  
 
 struct disk_info {
-	kdev_t	dev;
 	struct block_device *bdev;
 	int	operational;
 	int	number;
-- 
cgit v1.2.3


From f3ddcd6baa20805fea6fe560471f91b0b24fa2e5 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Thu, 4 Jul 2002 08:53:39 -0700
Subject: [PATCH] raid ->diskop() splitup

	* ->diskop() split into individual methods; prototypes cleaned
up.  In particular, handling of hot_add_disk() gets mdk_rdev_t * of
the component we are adding as an argument instead of playing the games
with major/minor.  Code cleaned up.
---
 drivers/md/md.c           |  29 ++--
 drivers/md/multipath.c    | 423 ++++++++++++++++++++++----------------------
 drivers/md/raid1.c        | 424 ++++++++++++++++++++++----------------------
 drivers/md/raid5.c        | 434 +++++++++++++++++++++-------------------------
 fs/partitions/check.c     |   7 +-
 include/linux/raid/md_k.h |  27 +--
 6 files changed, 632 insertions(+), 712 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 433b3c578cb6..c050810ab535 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1741,8 +1741,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
 			md_unregister_thread(mddev->sync_thread);
 			mddev->sync_thread = NULL;
 			if (mddev->spare) {
-				mddev->pers->diskop(mddev, &mddev->spare,
-						    DISKOP_SPARE_INACTIVE);
+				mddev->pers->spare_inactive(mddev);
 				mddev->spare = NULL;
 			}
 		}
@@ -2250,7 +2249,7 @@ static int hot_remove_disk(mddev_t * mddev, kdev_t dev)
 	printk(KERN_INFO "md: trying to remove %s from md%d ... \n",
 		partition_name(dev), mdidx(mddev));
 
-	if (!mddev->pers->diskop) {
+	if (!mddev->pers->hot_remove_disk) {
 		printk(KERN_WARNING "md%d: personality does not support diskops!\n",
 		       mdidx(mddev));
 		return -EINVAL;
@@ -2274,7 +2273,7 @@ static int hot_remove_disk(mddev_t * mddev, kdev_t dev)
 		return -EINVAL;
 	}
 
-	err = mddev->pers->diskop(mddev, &disk, DISKOP_HOT_REMOVE_DISK);
+	err = mddev->pers->hot_remove_disk(mddev, disk->number);
 	if (err == -EBUSY) {
 		MD_BUG();
 		goto busy;
@@ -2308,7 +2307,7 @@ static int hot_add_disk(mddev_t * mddev, kdev_t dev)
 	printk(KERN_INFO "md: trying to hot-add %s to md%d ... \n",
 		partition_name(dev), mdidx(mddev));
 
-	if (!mddev->pers->diskop) {
+	if (!mddev->pers->hot_add_disk) {
 		printk(KERN_WARNING "md%d: personality does not support diskops!\n",
 		       mdidx(mddev));
 		return -EINVAL;
@@ -2388,7 +2387,7 @@ static int hot_add_disk(mddev_t * mddev, kdev_t dev)
 	disk->major = major(dev);
 	disk->minor = minor(dev);
 
-	if (mddev->pers->diskop(mddev, &disk, DISKOP_HOT_ADD_DISK)) {
+	if (mddev->pers->hot_add_disk(mddev, disk, rdev)) {
 		MD_BUG();
 		err = -EINVAL;
 		goto abort_unbind_export;
@@ -3370,7 +3369,7 @@ void md_do_recovery(void *data)
 
 	ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) {
 		sb = mddev->sb;
-		if (!sb || !mddev->pers || !mddev->pers->diskop || mddev->ro)
+		if (!sb || !mddev->pers || mddev->ro)
 			goto unlock;
 		if (mddev->recovery_running > 0)
 			/* resync/recovery still happening */
@@ -3384,16 +3383,19 @@ void md_do_recovery(void *data)
 				 * If we were doing a reconstruction,
 				 * we need to retrieve the spare
 				 */
+				if (!mddev->pers->spare_inactive)
+					goto unlock;
 				if (mddev->spare) {
-					mddev->pers->diskop(mddev, &mddev->spare,
-							    DISKOP_SPARE_INACTIVE);
+					mddev->pers->spare_inactive(mddev);
 					mddev->spare = NULL;
 				}
 			} else {
+				if (!mddev->pers->spare_active)
+					goto unlock;
 				/* success...*/
 				if (mddev->spare) {
-					mddev->pers->diskop(mddev, &mddev->spare,
-							    DISKOP_SPARE_ACTIVE);
+					mddev->pers->spare_active(mddev,
+								&mddev->spare);
 					mark_disk_sync(mddev->spare);
 					mark_disk_active(mddev->spare);
 					sb->active_disks++;
@@ -3432,12 +3434,13 @@ void md_do_recovery(void *data)
 			if (!mddev->sync_thread) {
 				printk(KERN_ERR "md%d: could not start resync thread...\n", mdidx(mddev));
 				if (mddev->spare)
-					mddev->pers->diskop(mddev, &mddev->spare, DISKOP_SPARE_INACTIVE);
+					mddev->pers->spare_inactive(mddev);
 				mddev->spare = NULL;
 				mddev->recovery_running = 0;
 			} else {
 				if (mddev->spare)
-					mddev->pers->diskop(mddev, &mddev->spare, DISKOP_SPARE_WRITE);
+					mddev->pers->spare_write(mddev,
+						mddev->spare->number);
 				mddev->recovery_running = 1;
 				md_wakeup_thread(mddev->sync_thread);
 			}
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 87cfa0735212..97e6fc1d52de 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -55,9 +55,8 @@ static mdk_personality_t multipath_personality;
 static spinlock_t retry_list_lock = SPIN_LOCK_UNLOCKED;
 struct multipath_bh *multipath_retry_list = NULL, **multipath_retry_tail;
 
-static int multipath_diskop(mddev_t *mddev, mdp_disk_t **d, int state);
-
-
+static int multipath_spare_write(mddev_t *, int);
+static int multipath_spare_active(mddev_t *mddev, mdp_disk_t **d);
 
 static struct multipath_bh *multipath_alloc_mpbh(multipath_conf_t *conf)
 {
@@ -366,11 +365,11 @@ static int multipath_error (mddev_t *mddev, struct block_device *bdev)
 
 			spare = get_spare(mddev);
 			if (spare) {
-				err = multipath_diskop(mddev, &spare, DISKOP_SPARE_WRITE);
+				err = multipath_spare_write(mddev, spare->number);
 				printk("got DISKOP_SPARE_WRITE err: %d. (spare_faulty(): %d)\n", err, disk_faulty(spare));
 			}
 			if (!err && !disk_faulty(spare)) {
-				multipath_diskop(mddev, &spare, DISKOP_SPARE_ACTIVE);
+				multipath_spare_active(mddev, &spare);
 				mark_disk_sync(spare);
 				mark_disk_active(spare);
 				sb->active_disks++;
@@ -410,255 +409,238 @@ static void print_multipath_conf (multipath_conf_t *conf)
 	}
 }
 
-static int multipath_diskop(mddev_t *mddev, mdp_disk_t **d, int state)
+/*
+ * Find the spare disk ... (can only be in the 'high' area of the array)
+ */
+static struct multipath_info *find_spare(mddev_t *mddev, int number)
 {
-	int err = 0;
-	int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1;
 	multipath_conf_t *conf = mddev->private;
-	struct multipath_info *tmp, *sdisk, *fdisk, *rdisk, *adisk;
-	mdp_super_t *sb = mddev->sb;
-	mdp_disk_t *failed_desc, *spare_desc, *added_desc;
-	mdk_rdev_t *spare_rdev, *failed_rdev;
-	struct block_device *bdev;
+	int i;
+	for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
+		struct multipath_info *p = conf->multipaths + i;
+		if (p->spare && p->number == number)
+			return p;
+	}
+	return NULL;
+}
+
+static int multipath_spare_inactive(mddev_t *mddev)
+{
+	multipath_conf_t *conf = mddev->private;
+	struct multipath_info *p;
+	int err = 0;
 
 	print_multipath_conf(conf);
 	spin_lock_irq(&conf->device_lock);
-	/*
-	 * find the disk ...
-	 */
-	switch (state) {
-
-	case DISKOP_SPARE_ACTIVE:
+	p = find_spare(mddev, mddev->spare->number);
+	if (p) {
+		p->operational = 0;
+	} else {
+		MD_BUG();
+		err = 1;
+	}
+	spin_unlock_irq(&conf->device_lock);
 
-		/*
-		 * Find the failed disk within the MULTIPATH configuration ...
-		 * (this can only be in the first conf->working_disks part)
-		 */
-		for (i = 0; i < conf->raid_disks; i++) {
-			tmp = conf->multipaths + i;
-			if ((!tmp->operational && !tmp->spare) ||
-					!tmp->used_slot) {
-				failed_disk = i;
-				break;
-			}
-		}
-		/*
-		 * When we activate a spare disk we _must_ have a disk in
-		 * the lower (active) part of the array to replace. 
-		 */
-		if ((failed_disk == -1) || (failed_disk >= conf->raid_disks)) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		/* fall through */
+	print_multipath_conf(conf);
+	return err;
+}
 
-	case DISKOP_SPARE_WRITE:
-	case DISKOP_SPARE_INACTIVE:
+static int multipath_spare_write(mddev_t *mddev, int number)
+{
+	multipath_conf_t *conf = mddev->private;
+	struct multipath_info *p;
+	int err = 0;
 
-		/*
-		 * Find the spare disk ... (can only be in the 'high'
-		 * area of the array)
-		 */
-		for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
-			tmp = conf->multipaths + i;
-			if (tmp->spare && tmp->number == (*d)->number) {
-				spare_disk = i;
-				break;
-			}
-		}
-		if (spare_disk == -1) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		break;
-
-	case DISKOP_HOT_REMOVE_DISK:
-
-		for (i = 0; i < MD_SB_DISKS; i++) {
-			tmp = conf->multipaths + i;
-			if (tmp->used_slot && (tmp->number == (*d)->number)) {
-				if (tmp->operational) {
-					printk(KERN_ERR "hot-remove-disk, slot %d is identified to be the requested disk (number %d), but is still operational!\n", i, (*d)->number);
-					err = -EBUSY;
-					goto abort;
-				}
-				removed_disk = i;
-				break;
-			}
-		}
-		if (removed_disk == -1) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		break;
+	print_multipath_conf(conf);
+	spin_lock_irq(&conf->device_lock);
+	p = find_spare(mddev, number);
+	if (p) {
+		p->operational = 1;
+	} else {
+		MD_BUG();
+		err = 1;
+	}
+	spin_unlock_irq(&conf->device_lock);
 
-	case DISKOP_HOT_ADD_DISK:
+	print_multipath_conf(conf);
+	return err;
+}
 
-		for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
-			tmp = conf->multipaths + i;
-			if (!tmp->used_slot) {
-				added_disk = i;
-				break;
-			}
-		}
-		if (added_disk == -1) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		break;
-	}
+static int multipath_spare_active(mddev_t *mddev, mdp_disk_t **d)
+{
+	int err = 0;
+	int i, failed_disk=-1, spare_disk=-1;
+	multipath_conf_t *conf = mddev->private;
+	struct multipath_info *tmp, *sdisk, *fdisk;
+	mdp_super_t *sb = mddev->sb;
+	mdp_disk_t *failed_desc, *spare_desc;
+	mdk_rdev_t *spare_rdev, *failed_rdev;
 
-	switch (state) {
+	print_multipath_conf(conf);
+	spin_lock_irq(&conf->device_lock);
 	/*
-	 * Switch the spare disk to write-only mode:
+	 * Find the failed disk within the MULTIPATH configuration ...
+	 * (this can only be in the first conf->working_disks part)
 	 */
-	case DISKOP_SPARE_WRITE:
-		sdisk = conf->multipaths + spare_disk;
-		sdisk->operational = 1;
-		break;
+	for (i = 0; i < conf->raid_disks; i++) {
+		tmp = conf->multipaths + i;
+		if ((!tmp->operational && !tmp->spare) ||
+				!tmp->used_slot) {
+			failed_disk = i;
+			break;
+		}
+	}
 	/*
-	 * Deactivate a spare disk:
+	 * When we activate a spare disk we _must_ have a disk in
+	 * the lower (active) part of the array to replace. 
 	 */
-	case DISKOP_SPARE_INACTIVE:
-		sdisk = conf->multipaths + spare_disk;
-		sdisk->operational = 0;
-		break;
+	if (failed_disk == -1) {
+		MD_BUG();
+		err = 1;
+		goto abort;
+	}
 	/*
-	 * Activate (mark read-write) the (now sync) spare disk,
-	 * which means we switch it's 'raid position' (->raid_disk)
-	 * with the failed disk. (only the first 'conf->nr_disks'
-	 * slots are used for 'real' disks and we must preserve this
-	 * property)
+	 * Find the spare disk ... (can only be in the 'high'
+	 * area of the array)
 	 */
-	case DISKOP_SPARE_ACTIVE:
-		sdisk = conf->multipaths + spare_disk;
-		fdisk = conf->multipaths + failed_disk;
-
-		spare_desc = &sb->disks[sdisk->number];
-		failed_desc = &sb->disks[fdisk->number];
-
-		if (spare_desc != *d) {
-			MD_BUG();
-			err = 1;
-			goto abort;
+	for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
+		tmp = conf->multipaths + i;
+		if (tmp->spare && tmp->number == (*d)->number) {
+			spare_disk = i;
+			break;
 		}
+	}
+	if (spare_disk == -1) {
+		MD_BUG();
+		err = 1;
+		goto abort;
+	}
 
-		if (spare_desc->raid_disk != sdisk->raid_disk) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-			
-		if (sdisk->raid_disk != spare_disk) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
+	sdisk = conf->multipaths + spare_disk;
+	fdisk = conf->multipaths + failed_disk;
 
-		if (failed_desc->raid_disk != fdisk->raid_disk) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
+	spare_desc = &sb->disks[sdisk->number];
+	failed_desc = &sb->disks[fdisk->number];
 
-		if (fdisk->raid_disk != failed_disk) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
+	if (spare_desc != *d || spare_desc->raid_disk != sdisk->raid_disk ||
+	    sdisk->raid_disk != spare_disk || fdisk->raid_disk != failed_disk ||
+	    failed_desc->raid_disk != fdisk->raid_disk) {
+		MD_BUG();
+		err = 1;
+		goto abort;
+	}
 
-		/*
-		 * do the switch finally
-		 */
-		spare_rdev = find_rdev_nr(mddev, spare_desc->number);
-		failed_rdev = find_rdev_nr(mddev, failed_desc->number);
-		xchg_values(spare_rdev->desc_nr, failed_rdev->desc_nr);
-		spare_rdev->alias_device = 0;
-		failed_rdev->alias_device = 1;
+	/*
+	 * do the switch finally
+	 */
+	spare_rdev = find_rdev_nr(mddev, spare_desc->number);
+	failed_rdev = find_rdev_nr(mddev, failed_desc->number);
+	xchg_values(spare_rdev->desc_nr, failed_rdev->desc_nr);
+	spare_rdev->alias_device = 0;
+	failed_rdev->alias_device = 1;
 
-		xchg_values(*spare_desc, *failed_desc);
-		xchg_values(*fdisk, *sdisk);
+	xchg_values(*spare_desc, *failed_desc);
+	xchg_values(*fdisk, *sdisk);
 
-		/*
-		 * (careful, 'failed' and 'spare' are switched from now on)
-		 *
-		 * we want to preserve linear numbering and we want to
-		 * give the proper raid_disk number to the now activated
-		 * disk. (this means we switch back these values)
-		 */
-	
-		xchg_values(spare_desc->raid_disk, failed_desc->raid_disk);
-		xchg_values(sdisk->raid_disk, fdisk->raid_disk);
-		xchg_values(spare_desc->number, failed_desc->number);
-		xchg_values(sdisk->number, fdisk->number);
+	/*
+	 * (careful, 'failed' and 'spare' are switched from now on)
+	 *
+	 * we want to preserve linear numbering and we want to
+	 * give the proper raid_disk number to the now activated
+	 * disk. (this means we switch back these values)
+	 */
 
-		*d = failed_desc;
+	xchg_values(spare_desc->raid_disk, failed_desc->raid_disk);
+	xchg_values(sdisk->raid_disk, fdisk->raid_disk);
+	xchg_values(spare_desc->number, failed_desc->number);
+	xchg_values(sdisk->number, fdisk->number);
 
-		if (!sdisk->bdev)
-			sdisk->used_slot = 0;
-		/*
-		 * this really activates the spare.
-		 */
-		fdisk->spare = 0;
+	*d = failed_desc;
 
-		/*
-		 * if we activate a spare, we definitely replace a
-		 * non-operational disk slot in the 'low' area of
-		 * the disk array.
-		 */
+	if (!sdisk->bdev)
+		sdisk->used_slot = 0;
+	/*
+	 * this really activates the spare.
+	 */
+	fdisk->spare = 0;
 
-		conf->working_disks++;
+	/*
+	 * if we activate a spare, we definitely replace a
+	 * non-operational disk slot in the 'low' area of
+	 * the disk array.
+	 */
 
-		break;
+	conf->working_disks++;
+abort:
+	spin_unlock_irq(&conf->device_lock);
 
-	case DISKOP_HOT_REMOVE_DISK:
-		rdisk = conf->multipaths + removed_disk;
+	print_multipath_conf(conf);
+	return err;
+}
 
-		if (rdisk->spare && (removed_disk < conf->raid_disks)) {
-			MD_BUG();	
-			err = 1;
-			goto abort;
-		}
-		bdev = rdisk->bdev;
-		rdisk->dev = NODEV;
-		rdisk->bdev = NULL;
-		rdisk->used_slot = 0;
-		conf->nr_disks--;
-		bdput(bdev);
-		break;
-
-	case DISKOP_HOT_ADD_DISK:
-		adisk = conf->multipaths + added_disk;
-		added_desc = *d;
-
-		if (added_disk != added_desc->number) {
-			MD_BUG();	
-			err = 1;
-			goto abort;
+static int multipath_add_disk(mddev_t *mddev, mdp_disk_t *added_desc,
+	mdk_rdev_t *rdev)
+{
+	multipath_conf_t *conf = mddev->private;
+	int err = 1;
+	int i;
+
+	print_multipath_conf(conf);
+	spin_lock_irq(&conf->device_lock);
+	for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
+		struct multipath_info *p = conf->multipaths + i;
+		if (!p->used_slot) {
+			if (added_desc->number != i)
+				break;
+			p->number = added_desc->number;
+			p->raid_disk = added_desc->raid_disk;
+			p->dev = rdev->dev;
+			p->bdev = rdev->bdev;
+			p->operational = 0;
+			p->spare = 1;
+			p->used_slot = 1;
+			conf->nr_disks++;
+			err = 0;
+			break;
 		}
+	}
+	if (err)
+		MD_BUG();
+	spin_unlock_irq(&conf->device_lock);
 
-		adisk->number = added_desc->number;
-		adisk->raid_disk = added_desc->raid_disk;
-		adisk->dev = mk_kdev(added_desc->major,added_desc->minor);
-		/* it will be held open by rdev */
-		adisk->bdev = bdget(kdev_t_to_nr(adisk->dev));
+	print_multipath_conf(conf);
+	return err;
+}
 
-		adisk->operational = 0;
-		adisk->spare = 1;
-		adisk->used_slot = 1;
-		conf->nr_disks++;
+static int multipath_remove_disk(mddev_t *mddev, int number)
+{
+	multipath_conf_t *conf = mddev->private;
+	int err = 1;
+	int i;
 
-		break;
+	print_multipath_conf(conf);
+	spin_lock_irq(&conf->device_lock);
 
-	default:
-		MD_BUG();
-		err = 1;
-		goto abort;
+	for (i = 0; i < MD_SB_DISKS; i++) {
+		struct multipath_info *p = conf->multipaths + i;
+		if (p->used_slot && (p->number == number)) {
+			if (p->operational) {
+				printk(KERN_ERR "hot-remove-disk, slot %d is identified to be the requested disk (number %d), but is still operational!\n", i, number);
+				err = -EBUSY;
+				goto abort;
+			}
+			if (p->spare && i < conf->raid_disks)
+				break;
+			p->dev = NODEV;
+			p->bdev = NULL;
+			p->used_slot = 0;
+			conf->nr_disks--;
+			err = 0;
+			break;
+		}
 	}
+	if (err)
+		MD_BUG();
 abort:
 	spin_unlock_irq(&conf->device_lock);
 
@@ -666,7 +648,6 @@ abort:
 	return err;
 }
 
-
 #define IO_ERROR KERN_ALERT \
 "multipath: %s: unrecoverable IO read error for block %lu\n"
 
@@ -1074,7 +1055,11 @@ static mdk_personality_t multipath_personality=
 	stop:		multipath_stop,
 	status:		multipath_status,
 	error_handler:	multipath_error,
-	diskop:		multipath_diskop,
+	hot_add_disk:	multipath_add_disk,
+	hot_remove_disk:multipath_remove_disk,
+	spare_inactive:	multipath_spare_inactive,
+	spare_active:	multipath_spare_active,
+	spare_write:	multipath_spare_write,
 };
 
 static int __init multipath_init (void)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b996922b3347..76e2de202458 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -658,263 +658,244 @@ static void close_sync(conf_t *conf)
 	conf->r1buf_pool = NULL;
 }
 
-static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
+static mirror_info_t *find_spare(mddev_t *mddev, int number)
+{
+	conf_t *conf = mddev->private;
+	int i;
+	for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
+		mirror_info_t *p = conf->mirrors + i;
+		if (p->spare && p->number == number)
+			return p;
+	}
+	return NULL;
+}
+
+static int raid1_spare_active(mddev_t *mddev, mdp_disk_t **d)
 {
 	int err = 0;
-	int i, failed_disk = -1, spare_disk = -1, removed_disk = -1, added_disk = -1;
+	int i, failed_disk = -1, spare_disk = -1;
 	conf_t *conf = mddev->private;
-	mirror_info_t *tmp, *sdisk, *fdisk, *rdisk, *adisk;
+	mirror_info_t *tmp, *sdisk, *fdisk;
 	mdp_super_t *sb = mddev->sb;
-	mdp_disk_t *failed_desc, *spare_desc, *added_desc;
+	mdp_disk_t *failed_desc, *spare_desc;
 	mdk_rdev_t *spare_rdev, *failed_rdev;
-	struct block_device *bdev;
 
 	print_conf(conf);
 	spin_lock_irq(&conf->device_lock);
 	/*
-	 * find the disk ...
+	 * Find the failed disk within the RAID1 configuration ...
+	 * (this can only be in the first conf->working_disks part)
 	 */
-	switch (state) {
-
-	case DISKOP_SPARE_ACTIVE:
-
-		/*
-		 * Find the failed disk within the RAID1 configuration ...
-		 * (this can only be in the first conf->working_disks part)
-		 */
-		for (i = 0; i < conf->raid_disks; i++) {
-			tmp = conf->mirrors + i;
-			if ((!tmp->operational && !tmp->spare) ||
-					!tmp->used_slot) {
-				failed_disk = i;
-				break;
-			}
+	for (i = 0; i < conf->raid_disks; i++) {
+		tmp = conf->mirrors + i;
+		if ((!tmp->operational && !tmp->spare) ||
+				!tmp->used_slot) {
+			failed_disk = i;
+			break;
 		}
-		/*
-		 * When we activate a spare disk we _must_ have a disk in
-		 * the lower (active) part of the array to replace.
-		 */
-		if ((failed_disk == -1) || (failed_disk >= conf->raid_disks)) {
-			MD_BUG();
-			err = 1;
-			goto abort;
+	}
+	/*
+	 * When we activate a spare disk we _must_ have a disk in
+	 * the lower (active) part of the array to replace.
+	 */
+	if (failed_disk == -1) {
+		MD_BUG();
+		err = 1;
+		goto abort;
+	}
+	/*
+	 * Find the spare disk ... (can only be in the 'high'
+	 * area of the array)
+	 */
+	for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
+		tmp = conf->mirrors + i;
+		if (tmp->spare && tmp->number == (*d)->number) {
+			spare_disk = i;
+			break;
 		}
-		/* fall through */
-
-	case DISKOP_SPARE_WRITE:
-	case DISKOP_SPARE_INACTIVE:
+	}
+	if (spare_disk == -1) {
+		MD_BUG();
+		err = 1;
+		goto abort;
+	}
 
-		/*
-		 * Find the spare disk ... (can only be in the 'high'
-		 * area of the array)
-		 */
-		for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
-			tmp = conf->mirrors + i;
-			if (tmp->spare && tmp->number == (*d)->number) {
-				spare_disk = i;
-				break;
-			}
-		}
-		if (spare_disk == -1) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		break;
-
-	case DISKOP_HOT_REMOVE_DISK:
-
-		for (i = 0; i < MD_SB_DISKS; i++) {
-			tmp = conf->mirrors + i;
-			if (tmp->used_slot && (tmp->number == (*d)->number)) {
-				if (tmp->operational) {
-					err = -EBUSY;
-					goto abort;
-				}
-				removed_disk = i;
-				break;
-			}
-		}
-		if (removed_disk == -1) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		break;
+	sdisk = conf->mirrors + spare_disk;
+	fdisk = conf->mirrors + failed_disk;
 
-	case DISKOP_HOT_ADD_DISK:
+	spare_desc = &sb->disks[sdisk->number];
+	failed_desc = &sb->disks[fdisk->number];
 
-		for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
-			tmp = conf->mirrors + i;
-			if (!tmp->used_slot) {
-				added_disk = i;
-				break;
-			}
-		}
-		if (added_disk == -1) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		break;
+	if (spare_desc != *d || spare_desc->raid_disk != sdisk->raid_disk ||
+	    sdisk->raid_disk != spare_disk || fdisk->raid_disk != failed_disk ||
+	    failed_desc->raid_disk != fdisk->raid_disk) {
+		MD_BUG();
+		err = 1;
+		goto abort;
 	}
 
-	switch (state) {
-	/*
-	 * Switch the spare disk to write-only mode:
-	 */
-	case DISKOP_SPARE_WRITE:
-		sdisk = conf->mirrors + spare_disk;
-		sdisk->operational = 1;
-		sdisk->write_only = 1;
-		break;
 	/*
-	 * Deactivate a spare disk:
+	 * do the switch finally
 	 */
-	case DISKOP_SPARE_INACTIVE:
-		sdisk = conf->mirrors + spare_disk;
-		sdisk->operational = 0;
-		sdisk->write_only = 0;
-		break;
+	spare_rdev = find_rdev_nr(mddev, spare_desc->number);
+	failed_rdev = find_rdev_nr(mddev, failed_desc->number);
+
 	/*
-	 * Activate (mark read-write) the (now sync) spare disk,
-	 * which means we switch it's 'raid position' (->raid_disk)
-	 * with the failed disk. (only the first 'conf->nr_disks'
-	 * slots are used for 'real' disks and we must preserve this
-	 * property)
+	 * There must be a spare_rdev, but there may not be a
+	 * failed_rdev. That slot might be empty...
 	 */
-	case DISKOP_SPARE_ACTIVE:
-		sdisk = conf->mirrors + spare_disk;
-		fdisk = conf->mirrors + failed_disk;
-
-		spare_desc = &sb->disks[sdisk->number];
-		failed_desc = &sb->disks[fdisk->number];
-
-		if (spare_desc != *d) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
+	spare_rdev->desc_nr = failed_desc->number;
+	if (failed_rdev)
+		failed_rdev->desc_nr = spare_desc->number;
 
-		if (spare_desc->raid_disk != sdisk->raid_disk) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-
-		if (sdisk->raid_disk != spare_disk) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
+	xchg_values(*spare_desc, *failed_desc);
+	xchg_values(*fdisk, *sdisk);
 
-		if (failed_desc->raid_disk != fdisk->raid_disk) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-
-		if (fdisk->raid_disk != failed_disk) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
+	/*
+	 * (careful, 'failed' and 'spare' are switched from now on)
+	 *
+	 * we want to preserve linear numbering and we want to
+	 * give the proper raid_disk number to the now activated
+	 * disk. (this means we switch back these values)
+	 */
+	xchg_values(spare_desc->raid_disk, failed_desc->raid_disk);
+	xchg_values(sdisk->raid_disk, fdisk->raid_disk);
+	xchg_values(spare_desc->number, failed_desc->number);
+	xchg_values(sdisk->number, fdisk->number);
 
-		/*
-		 * do the switch finally
-		 */
-		spare_rdev = find_rdev_nr(mddev, spare_desc->number);
-		failed_rdev = find_rdev_nr(mddev, failed_desc->number);
+	*d = failed_desc;
 
-		/*
-		 * There must be a spare_rdev, but there may not be a
-		 * failed_rdev. That slot might be empty...
-		 */
-		spare_rdev->desc_nr = failed_desc->number;
-		if (failed_rdev)
-			failed_rdev->desc_nr = spare_desc->number;
+	if (!sdisk->bdev)
+		sdisk->used_slot = 0;
+	/*
+	 * this really activates the spare.
+	 */
+	fdisk->spare = 0;
+	fdisk->write_only = 0;
 
-		xchg_values(*spare_desc, *failed_desc);
-		xchg_values(*fdisk, *sdisk);
+	/*
+	 * if we activate a spare, we definitely replace a
+	 * non-operational disk slot in the 'low' area of
+	 * the disk array.
+	 */
 
-		/*
-		 * (careful, 'failed' and 'spare' are switched from now on)
-		 *
-		 * we want to preserve linear numbering and we want to
-		 * give the proper raid_disk number to the now activated
-		 * disk. (this means we switch back these values)
-		 */
-		xchg_values(spare_desc->raid_disk, failed_desc->raid_disk);
-		xchg_values(sdisk->raid_disk, fdisk->raid_disk);
-		xchg_values(spare_desc->number, failed_desc->number);
-		xchg_values(sdisk->number, fdisk->number);
+	conf->working_disks++;
+abort:
+	spin_unlock_irq(&conf->device_lock);
 
-		*d = failed_desc;
+	print_conf(conf);
+	return err;
+}
 
-		if (!sdisk->bdev)
-			sdisk->used_slot = 0;
-		/*
-		 * this really activates the spare.
-		 */
-		fdisk->spare = 0;
-		fdisk->write_only = 0;
+static int raid1_spare_inactive(mddev_t *mddev)
+{
+	conf_t *conf = mddev->private;
+	mirror_info_t *p;
+	int err = 0;
 
-		/*
-		 * if we activate a spare, we definitely replace a
-		 * non-operational disk slot in the 'low' area of
-		 * the disk array.
-		 */
+	print_conf(conf);
+	spin_lock_irq(&conf->device_lock);
+	p = find_spare(mddev, mddev->spare->number);
+	if (p) {
+		p->operational = 0;
+		p->write_only = 0;
+	} else {
+		MD_BUG();
+		err = 1;
+	}
+	spin_unlock_irq(&conf->device_lock);
+	print_conf(conf);
+	return err;
+}
 
-		conf->working_disks++;
+static int raid1_spare_write(mddev_t *mddev, int number)
+{
+	conf_t *conf = mddev->private;
+	mirror_info_t *p;
+	int err = 0;
 
-		break;
+	print_conf(conf);
+	spin_lock_irq(&conf->device_lock);
+	p = find_spare(mddev, number);
+	if (p) {
+		p->operational = 1;
+		p->write_only = 1;
+	} else {
+		MD_BUG();
+		err = 1;
+	}
+	spin_unlock_irq(&conf->device_lock);
+	print_conf(conf);
+	return err;
+}
 
-	case DISKOP_HOT_REMOVE_DISK:
-		rdisk = conf->mirrors + removed_disk;
+static int raid1_add_disk(mddev_t *mddev, mdp_disk_t *added_desc,
+	mdk_rdev_t *rdev)
+{
+	conf_t *conf = mddev->private;
+	int err = 1;
+	int i;
 
-		if (rdisk->spare && (removed_disk < conf->raid_disks)) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		bdev = rdisk->bdev;
-		rdisk->dev = NODEV;
-		rdisk->bdev = NULL;
-		rdisk->used_slot = 0;
-		conf->nr_disks--;
-		bdput(bdev);
-		break;
-
-	case DISKOP_HOT_ADD_DISK:
-		adisk = conf->mirrors + added_disk;
-		added_desc = *d;
-
-		if (added_disk != added_desc->number) {
-			MD_BUG();
-			err = 1;
-			goto abort;
+	print_conf(conf);
+	spin_lock_irq(&conf->device_lock);
+	/*
+	 * find the disk ...
+	 */
+	for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
+		mirror_info_t *p = conf->mirrors + i;
+		if (!p->used_slot) {
+			if (added_desc->number != i)
+				break;
+			p->number = added_desc->number;
+			p->raid_disk = added_desc->raid_disk;
+			p->dev = rdev->dev;
+			/* it will be held open by rdev */
+			p->bdev = rdev->bdev;
+			p->operational = 0;
+			p->write_only = 0;
+			p->spare = 1;
+			p->used_slot = 1;
+			p->head_position = 0;
+			conf->nr_disks++;
+			err = 0;
+			break;
 		}
+	}
+	if (err)
+		MD_BUG();
+	spin_unlock_irq(&conf->device_lock);
 
-		adisk->number = added_desc->number;
-		adisk->raid_disk = added_desc->raid_disk;
-		adisk->dev = mk_kdev(added_desc->major, added_desc->minor);
-		/* it will be held open by rdev */
-		adisk->bdev = bdget(kdev_t_to_nr(adisk->dev));
-
-		adisk->operational = 0;
-		adisk->write_only = 0;
-		adisk->spare = 1;
-		adisk->used_slot = 1;
-		adisk->head_position = 0;
-		conf->nr_disks++;
+	print_conf(conf);
+	return err;
+}
 
-		break;
+static int raid1_remove_disk(mddev_t *mddev, int number)
+{
+	conf_t *conf = mddev->private;
+	int err = 1;
+	int i;
 
-	default:
-		MD_BUG();
-		err = 1;
-		goto abort;
+	print_conf(conf);
+	spin_lock_irq(&conf->device_lock);
+	for (i = 0; i < MD_SB_DISKS; i++) {
+		mirror_info_t *p = conf->mirrors + i;
+		if (p->used_slot && (p->number == number)) {
+			if (p->operational) {
+				err = -EBUSY;
+				goto abort;
+			}
+			if (p->spare && (i < conf->raid_disks))
+				break;
+			p->dev = NODEV;
+			p->bdev = NULL;
+			p->used_slot = 0;
+			conf->nr_disks--;
+			err = 0;
+			break;
+		}
 	}
+	if (err)
+		MD_BUG();
 abort:
 	spin_unlock_irq(&conf->device_lock);
 
@@ -922,7 +903,6 @@ abort:
 	return err;
 }
 
-
 #define IO_ERROR KERN_ALERT \
 "raid1: %s: unrecoverable I/O read error for block %lu\n"
 
@@ -1495,7 +1475,11 @@ static mdk_personality_t raid1_personality =
 	stop:		stop,
 	status:		status,
 	error_handler:	error,
-	diskop:		diskop,
+	hot_add_disk:	raid1_add_disk,
+	hot_remove_disk:raid1_remove_disk,
+	spare_write:	raid1_spare_write,
+	spare_inactive:	raid1_spare_inactive,
+	spare_active:	raid1_spare_active,
 	sync_request:	sync_request
 };
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7e3581804db1..5218dfabbc01 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1694,276 +1694,242 @@ static void print_raid5_conf (raid5_conf_t *conf)
 	}
 }
 
-static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
+static struct disk_info *find_spare(mddev_t *mddev, int number)
+{
+	raid5_conf_t *conf = mddev->private;
+	int i;
+	for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
+		struct disk_info *p = conf->disks + i;
+		if (p->spare && p->number == number)
+			return p;
+	}
+	return NULL;
+}
+
+static int raid5_spare_active(mddev_t *mddev, mdp_disk_t **d)
 {
 	int err = 0;
-	int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1;
+	int i, failed_disk=-1, spare_disk=-1;
 	raid5_conf_t *conf = mddev->private;
-	struct disk_info *tmp, *sdisk, *fdisk, *rdisk, *adisk;
+	struct disk_info *tmp, *sdisk, *fdisk;
 	mdp_super_t *sb = mddev->sb;
-	mdp_disk_t *failed_desc, *spare_desc, *added_desc;
+	mdp_disk_t *failed_desc, *spare_desc;
 	mdk_rdev_t *spare_rdev, *failed_rdev;
 
 	print_raid5_conf(conf);
 	spin_lock_irq(&conf->device_lock);
+	for (i = 0; i < conf->raid_disks; i++) {
+		tmp = conf->disks + i;
+		if ((!tmp->operational && !tmp->spare) ||
+				!tmp->used_slot) {
+			failed_disk = i;
+			break;
+		}
+	}
+	if (failed_disk == -1) {
+		MD_BUG();
+		err = 1;
+		goto abort;
+	}
 	/*
-	 * find the disk ...
+	 * Find the spare disk ... (can only be in the 'high'
+	 * area of the array)
 	 */
-	switch (state) {
-
-	case DISKOP_SPARE_ACTIVE:
-
-		/*
-		 * Find the failed disk within the RAID5 configuration ...
-		 * (this can only be in the first conf->raid_disks part)
-		 */
-		for (i = 0; i < conf->raid_disks; i++) {
-			tmp = conf->disks + i;
-			if ((!tmp->operational && !tmp->spare) ||
-					!tmp->used_slot) {
-				failed_disk = i;
-				break;
-			}
-		}
-		/*
-		 * When we activate a spare disk we _must_ have a disk in
-		 * the lower (active) part of the array to replace.
-		 */
-		if ((failed_disk == -1) || (failed_disk >= conf->raid_disks)) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		/* fall through */
-
-	case DISKOP_SPARE_WRITE:
-	case DISKOP_SPARE_INACTIVE:
-
-		/*
-		 * Find the spare disk ... (can only be in the 'high'
-		 * area of the array)
-		 */
-		for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
-			tmp = conf->disks + i;
-			if (tmp->spare && tmp->number == (*d)->number) {
-				spare_disk = i;
-				break;
-			}
-		}
-		if (spare_disk == -1) {
-			MD_BUG();
-			err = 1;
-			goto abort;
+	for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
+		tmp = conf->disks + i;
+		if (tmp->spare && tmp->number == (*d)->number) {
+			spare_disk = i;
+			break;
 		}
-		break;
-
-	case DISKOP_HOT_REMOVE_DISK:
+	}
+	if (spare_disk == -1) {
+		MD_BUG();
+		err = 1;
+		goto abort;
+	}
 
-		for (i = 0; i < MD_SB_DISKS; i++) {
-			tmp = conf->disks + i;
-			if (tmp->used_slot && (tmp->number == (*d)->number)) {
-				if (tmp->operational) {
-					err = -EBUSY;
-					goto abort;
-				}
-				removed_disk = i;
-				break;
-			}
-		}
-		if (removed_disk == -1) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		break;
+	if (!conf->spare) {
+		MD_BUG();
+		err = 1;
+		goto abort;
+	}
+	sdisk = conf->disks + spare_disk;
+	fdisk = conf->disks + failed_disk;
 
-	case DISKOP_HOT_ADD_DISK:
+	spare_desc = &sb->disks[sdisk->number];
+	failed_desc = &sb->disks[fdisk->number];
 
-		for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
-			tmp = conf->disks + i;
-			if (!tmp->used_slot) {
-				added_disk = i;
-				break;
-			}
-		}
-		if (added_disk == -1) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		break;
+	if (spare_desc != *d || spare_desc->raid_disk != sdisk->raid_disk ||
+	    sdisk->raid_disk != spare_disk || fdisk->raid_disk != failed_disk ||
+	    failed_desc->raid_disk != fdisk->raid_disk) {
+		MD_BUG();
+		err = 1;
+		goto abort;
 	}
 
-	switch (state) {
 	/*
-	 * Switch the spare disk to write-only mode:
+	 * do the switch finally
 	 */
-	case DISKOP_SPARE_WRITE:
-		if (conf->spare) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		sdisk = conf->disks + spare_disk;
-		sdisk->operational = 1;
-		sdisk->write_only = 1;
-		conf->spare = sdisk;
-		break;
-	/*
-	 * Deactivate a spare disk:
+	spare_rdev = find_rdev_nr(mddev, spare_desc->number);
+	failed_rdev = find_rdev_nr(mddev, failed_desc->number);
+
+	/* There must be a spare_rdev, but there may not be a
+	 * failed_rdev.  That slot might be empty...
 	 */
-	case DISKOP_SPARE_INACTIVE:
-		sdisk = conf->disks + spare_disk;
-		sdisk->operational = 0;
-		sdisk->write_only = 0;
-		/*
-		 * Was the spare being resynced?
-		 */
-		if (conf->spare == sdisk)
-			conf->spare = NULL;
-		break;
+	spare_rdev->desc_nr = failed_desc->number;
+	if (failed_rdev)
+		failed_rdev->desc_nr = spare_desc->number;
+	
+	xchg_values(*spare_desc, *failed_desc);
+	xchg_values(*fdisk, *sdisk);
+
 	/*
-	 * Activate (mark read-write) the (now sync) spare disk,
-	 * which means we switch it's 'raid position' (->raid_disk)
-	 * with the failed disk. (only the first 'conf->raid_disks'
-	 * slots are used for 'real' disks and we must preserve this
-	 * property)
+	 * (careful, 'failed' and 'spare' are switched from now on)
+	 *
+	 * we want to preserve linear numbering and we want to
+	 * give the proper raid_disk number to the now activated
+	 * disk. (this means we switch back these values)
 	 */
-	case DISKOP_SPARE_ACTIVE:
-		if (!conf->spare) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-		sdisk = conf->disks + spare_disk;
-		fdisk = conf->disks + failed_disk;
 
-		spare_desc = &sb->disks[sdisk->number];
-		failed_desc = &sb->disks[fdisk->number];
+	xchg_values(spare_desc->raid_disk, failed_desc->raid_disk);
+	xchg_values(sdisk->raid_disk, fdisk->raid_disk);
+	xchg_values(spare_desc->number, failed_desc->number);
+	xchg_values(sdisk->number, fdisk->number);
 
-		if (spare_desc != *d) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
+	*d = failed_desc;
 
-		if (spare_desc->raid_disk != sdisk->raid_disk) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-			
-		if (sdisk->raid_disk != spare_disk) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
+	if (!sdisk->bdev)
+		sdisk->used_slot = 0;
 
-		if (failed_desc->raid_disk != fdisk->raid_disk) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-
-		if (fdisk->raid_disk != failed_disk) {
-			MD_BUG();
-			err = 1;
-			goto abort;
-		}
-
-		/*
-		 * do the switch finally
-		 */
-		spare_rdev = find_rdev_nr(mddev, spare_desc->number);
-		failed_rdev = find_rdev_nr(mddev, failed_desc->number);
-
-		/* There must be a spare_rdev, but there may not be a
-		 * failed_rdev.  That slot might be empty...
-		 */
-		spare_rdev->desc_nr = failed_desc->number;
-		if (failed_rdev)
-			failed_rdev->desc_nr = spare_desc->number;
-		
-		xchg_values(*spare_desc, *failed_desc);
-		xchg_values(*fdisk, *sdisk);
+	/*
+	 * this really activates the spare.
+	 */
+	fdisk->spare = 0;
+	fdisk->write_only = 0;
 
-		/*
-		 * (careful, 'failed' and 'spare' are switched from now on)
-		 *
-		 * we want to preserve linear numbering and we want to
-		 * give the proper raid_disk number to the now activated
-		 * disk. (this means we switch back these values)
-		 */
-	
-		xchg_values(spare_desc->raid_disk, failed_desc->raid_disk);
-		xchg_values(sdisk->raid_disk, fdisk->raid_disk);
-		xchg_values(spare_desc->number, failed_desc->number);
-		xchg_values(sdisk->number, fdisk->number);
+	/*
+	 * if we activate a spare, we definitely replace a
+	 * non-operational disk slot in the 'low' area of
+	 * the disk array.
+	 */
+	conf->failed_disks--;
+	conf->working_disks++;
+	conf->spare = NULL;
+abort:
+	spin_unlock_irq(&conf->device_lock);
+	print_raid5_conf(conf);
+	return err;
+}
 
-		*d = failed_desc;
+static int raid5_spare_inactive(mddev_t *mddev)
+{
+	raid5_conf_t *conf = mddev->private;
+	struct disk_info *p;
+	int err = 0;
 
-		if (!sdisk->bdev)
-			sdisk->used_slot = 0;
+	print_raid5_conf(conf);
+	spin_lock_irq(&conf->device_lock);
+	p = find_spare(mddev, mddev->spare->number);
+	if (p) {
+		p->operational = 0;
+		p->write_only = 0;
+		if (conf->spare == p)
+			conf->spare = NULL;
+	} else {
+		MD_BUG();
+		err = 1;
+	}
+	spin_unlock_irq(&conf->device_lock);
+	print_raid5_conf(conf);
+	return err;
+}
 
-		/*
-		 * this really activates the spare.
-		 */
-		fdisk->spare = 0;
-		fdisk->write_only = 0;
+static int raid5_spare_write(mddev_t *mddev, int number)
+{
+	raid5_conf_t *conf = mddev->private;
+	struct disk_info *p;
+	int err = 0;
 
-		/*
-		 * if we activate a spare, we definitely replace a
-		 * non-operational disk slot in the 'low' area of
-		 * the disk array.
-		 */
-		conf->failed_disks--;
-		conf->working_disks++;
-		conf->spare = NULL;
+	print_raid5_conf(conf);
+	spin_lock_irq(&conf->device_lock);
+	p = find_spare(mddev, number);
+	if (p && !conf->spare) {
+		p->operational = 1;
+		p->write_only = 1;
+		conf->spare = p;
+	} else {
+		MD_BUG();
+		err = 1;
+	}
+	spin_unlock_irq(&conf->device_lock);
+	print_raid5_conf(conf);
+	return err;
+}
 
-		break;
+static int raid5_remove_disk(mddev_t *mddev, int number)
+{
+	raid5_conf_t *conf = mddev->private;
+	int err = 1;
+	int i;
 
-	case DISKOP_HOT_REMOVE_DISK:
-		rdisk = conf->disks + removed_disk;
+	print_raid5_conf(conf);
+	spin_lock_irq(&conf->device_lock);
 
-		if (rdisk->spare && (removed_disk < conf->raid_disks)) {
-			MD_BUG();	
-			err = 1;
-			goto abort;
+	for (i = 0; i < MD_SB_DISKS; i++) {
+		struct disk_info *p = conf->disks + i;
+		if (p->used_slot && p->number == number) {
+			if (p->operational) {
+				err = -EBUSY;
+				goto abort;
+			}
+			if (p->spare && i < conf->raid_disks)
+				break;
+			p->bdev = NULL;
+			p->used_slot = 0;
+			err = 0;
+			break;
 		}
-		rdisk->bdev = NULL;
-		rdisk->used_slot = 0;
+	}
+	if (err)
+		MD_BUG();
+abort:
+	spin_unlock_irq(&conf->device_lock);
+	print_raid5_conf(conf);
+	return err;
+}
 
-		break;
+static int raid5_add_disk(mddev_t *mddev, mdp_disk_t *added_desc,
+	mdk_rdev_t *rdev)
+{
+	raid5_conf_t *conf = mddev->private;
+	int err = 1;
+	int i;
 
-	case DISKOP_HOT_ADD_DISK:
-		adisk = conf->disks + added_disk;
-		added_desc = *d;
+	print_raid5_conf(conf);
+	spin_lock_irq(&conf->device_lock);
+	/*
+	 * find the disk ...
+	 */
 
-		if (added_disk != added_desc->number) {
-			MD_BUG();	
-			err = 1;
-			goto abort;
+	for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
+		struct disk_info *p = conf->disks + i;
+		if (!p->used_slot) {
+			if (added_desc->number != i)
+				break;
+			p->number = added_desc->number;
+			p->raid_disk = added_desc->raid_disk;
+			/* it will be held open by rdev */
+			p->bdev = rdev->bdev;
+			p->operational = 0;
+			p->write_only = 0;
+			p->spare = 1;
+			p->used_slot = 1;
+			err = 0;
+			break;
 		}
-
-		adisk->number = added_desc->number;
-		adisk->raid_disk = added_desc->raid_disk;
-		/* it will be held open by rdev */
-		adisk->bdev = bdget(MKDEV(added_desc->major,added_desc->minor));
-
-		adisk->operational = 0;
-		adisk->write_only = 0;
-		adisk->spare = 1;
-		adisk->used_slot = 1;
-
-
-		break;
-
-	default:
-		MD_BUG();	
-		err = 1;
-		goto abort;
 	}
-abort:
+	if (err)
+		MD_BUG();
 	spin_unlock_irq(&conf->device_lock);
 	print_raid5_conf(conf);
 	return err;
@@ -1977,7 +1943,11 @@ static mdk_personality_t raid5_personality=
 	stop:		stop,
 	status:		status,
 	error_handler:	error,
-	diskop:		diskop,
+	hot_add_disk:	raid5_add_disk,
+	hot_remove_disk:raid5_remove_disk,
+	spare_write:	raid5_spare_write,
+	spare_inactive:	raid5_spare_inactive,
+	spare_active:	raid5_spare_active,
 	sync_request:	sync_request
 };
 
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 3137aa78fea6..2c5ef516b1c5 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -13,14 +13,9 @@
  *  Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl}
  */
 
-#include <linux/config.h>
+#include <linux/init.h>
 #include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/kernel.h>
-#include <linux/major.h>
 #include <linux/blk.h>
-#include <linux/init.h>
-#include <linux/raid/md.h>
 #include <linux/buffer_head.h>	/* for invalidate_bdev() */
 
 #include "check.h"
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 69282d99a9eb..7b02c3c82a33 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -160,16 +160,6 @@ struct mdk_rdev_s
 	int desc_nr;			/* descriptor index in the superblock */
 };
 
-
-/*
- * disk operations in a working array:
- */
-#define DISKOP_SPARE_INACTIVE	0
-#define DISKOP_SPARE_WRITE	1
-#define DISKOP_SPARE_ACTIVE	2
-#define DISKOP_HOT_REMOVE_DISK	3
-#define DISKOP_HOT_ADD_DISK	4
-
 typedef struct mdk_personality_s mdk_personality_t;
 
 struct mddev_s
@@ -214,18 +204,11 @@ struct mdk_personality_s
 	int (*stop)(mddev_t *mddev);
 	int (*status)(char *page, mddev_t *mddev);
 	int (*error_handler)(mddev_t *mddev, struct block_device *bdev);
-
-/*
- * Some personalities (RAID-1, RAID-5) can have disks hot-added and
- * hot-removed. Hot removal is different from failure. (failure marks
- * a disk inactive, but the disk is still part of the array) The interface
- * to such operations is the 'pers->diskop()' function, can be NULL.
- *
- * the diskop function can change the pointer pointing to the incoming
- * descriptor, but must do so very carefully. (currently only
- * SPARE_ACTIVE expects such a change)
- */
-	int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state);
+	int (*hot_add_disk) (mddev_t *mddev, mdp_disk_t *descriptor, mdk_rdev_t *rdev);
+	int (*hot_remove_disk) (mddev_t *mddev, int number);
+	int (*spare_write) (mddev_t *mddev, int number);
+	int (*spare_inactive) (mddev_t *mddev);
+	int (*spare_active) (mddev_t *mddev, mdp_disk_t **descriptor);
 	int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster);
 };
 
-- 
cgit v1.2.3


From 881c3bc1a70c9aaf69d0b0ca115b706ba7ededc3 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Thu, 4 Jul 2002 08:53:44 -0700
Subject: [PATCH] raid kdev_t cleanups - part 2

	* a bunch of callers of partition_name() are calling
bdev_partition_name(),
	* the last users of raid1 and multipath ->dev are gone; so are
the fields in question.
---
 drivers/md/multipath.c         | 15 ++++++---------
 drivers/md/raid1.c             | 22 ++++++++--------------
 drivers/md/raid5.c             | 21 +++++++++------------
 include/linux/raid/md.h        |  2 +-
 include/linux/raid/multipath.h |  1 -
 include/linux/raid/raid1.h     |  1 -
 6 files changed, 24 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 97e6fc1d52de..ac08a9a90611 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -312,7 +312,7 @@ static void mark_disk_bad (mddev_t *mddev, int failed)
 	mddev->sb_dirty = 1;
 	md_wakeup_thread(conf->thread);
 	conf->working_disks--;
-	printk (DISK_FAILED, partition_name (multipath->dev),
+	printk (DISK_FAILED, bdev_partition_name (multipath->bdev),
 				 conf->working_disks);
 }
 
@@ -405,7 +405,7 @@ static void print_multipath_conf (multipath_conf_t *conf)
 			printk(" disk%d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n",
 				i, tmp->spare,tmp->operational,
 				tmp->number,tmp->raid_disk,tmp->used_slot,
-				partition_name(tmp->dev));
+				bdev_partition_name(tmp->bdev));
 	}
 }
 
@@ -594,7 +594,6 @@ static int multipath_add_disk(mddev_t *mddev, mdp_disk_t *added_desc,
 				break;
 			p->number = added_desc->number;
 			p->raid_disk = added_desc->raid_disk;
-			p->dev = rdev->dev;
 			p->bdev = rdev->bdev;
 			p->operational = 0;
 			p->spare = 1;
@@ -631,7 +630,6 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
 			}
 			if (p->spare && i < conf->raid_disks)
 				break;
-			p->dev = NODEV;
 			p->bdev = NULL;
 			p->used_slot = 0;
 			conf->nr_disks--;
@@ -853,7 +851,7 @@ static int multipath_run (mddev_t *mddev)
 		if (rdev->faulty) {
 			/* this is a "should never happen" case and if it */
 			/* ever does happen, a continue; won't help */
-			printk(ERRORS, partition_name(rdev->dev));
+			printk(ERRORS, bdev_partition_name(rdev->bdev));
 			continue;
 		} else {
 			/* this is a "should never happen" case and if it */
@@ -873,7 +871,7 @@ static int multipath_run (mddev_t *mddev)
 		disk = conf->multipaths + disk_idx;
 
 		if (!disk_sync(desc))
-			printk(NOT_IN_SYNC, partition_name(rdev->dev));
+			printk(NOT_IN_SYNC, bdev_partition_name(rdev->bdev));
 
 		/*
 		 * Mark all disks as spare to start with, then pick our
@@ -882,7 +880,6 @@ static int multipath_run (mddev_t *mddev)
 		 */
 		disk->number = desc->number;
 		disk->raid_disk = desc->raid_disk;
-		disk->dev = rdev->dev;
 		disk->bdev = rdev->bdev;
 		atomic_inc(&rdev->bdev->bd_count);
 		disk->operational = 0;
@@ -892,7 +889,7 @@ static int multipath_run (mddev_t *mddev)
 
 		if (disk_active(desc)) {
 			if(!conf->working_disks) {
-				printk(OPERATIONAL, partition_name(rdev->dev),
+				printk(OPERATIONAL, bdev_partition_name(rdev->bdev),
  					desc->raid_disk);
 				disk->operational = 1;
 				disk->spare = 0;
@@ -909,7 +906,7 @@ static int multipath_run (mddev_t *mddev)
 	if(!conf->working_disks && num_rdevs) {
 		desc = &sb->disks[def_rdev->desc_nr];
 		disk = conf->multipaths + desc->raid_disk;
-		printk(OPERATIONAL, partition_name(def_rdev->dev),
+		printk(OPERATIONAL, bdev_partition_name(def_rdev->bdev),
 			disk->raid_disk);
 		disk->operational = 1;
 		disk->spare = 0;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 76e2de202458..088993cf5165 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -589,7 +589,7 @@ static void mark_disk_bad(mddev_t *mddev, int failed)
 	md_wakeup_thread(conf->thread);
 	if (!mirror->write_only)
 		conf->working_disks--;
-	printk(DISK_FAILED, partition_name(mirror->dev), conf->working_disks);
+	printk(DISK_FAILED, bdev_partition_name(mirror->bdev), conf->working_disks);
 }
 
 static int error(mddev_t *mddev, struct block_device *bdev)
@@ -640,7 +640,7 @@ static void print_conf(conf_t *conf)
 		printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n",
 			i, tmp->spare, tmp->operational,
 			tmp->number, tmp->raid_disk, tmp->used_slot,
-			partition_name(tmp->dev));
+			bdev_partition_name(tmp->bdev));
 	}
 }
 
@@ -848,7 +848,6 @@ static int raid1_add_disk(mddev_t *mddev, mdp_disk_t *added_desc,
 				break;
 			p->number = added_desc->number;
 			p->raid_disk = added_desc->raid_disk;
-			p->dev = rdev->dev;
 			/* it will be held open by rdev */
 			p->bdev = rdev->bdev;
 			p->operational = 0;
@@ -886,7 +885,6 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
 			}
 			if (p->spare && (i < conf->raid_disks))
 				break;
-			p->dev = NODEV;
 			p->bdev = NULL;
 			p->used_slot = 0;
 			conf->nr_disks--;
@@ -1284,7 +1282,7 @@ static int run(mddev_t *mddev)
 
 	ITERATE_RDEV(mddev, rdev, tmp) {
 		if (rdev->faulty) {
-			printk(ERRORS, partition_name(rdev->dev));
+			printk(ERRORS, bdev_partition_name(rdev->bdev));
 		} else {
 			if (!rdev->sb) {
 				MD_BUG();
@@ -1302,7 +1300,6 @@ static int run(mddev_t *mddev)
 		if (disk_faulty(descriptor)) {
 			disk->number = descriptor->number;
 			disk->raid_disk = disk_idx;
-			disk->dev = rdev->dev;
 			disk->bdev = rdev->bdev;
 			atomic_inc(&rdev->bdev->bd_count);
 			disk->operational = 0;
@@ -1315,27 +1312,26 @@ static int run(mddev_t *mddev)
 		if (disk_active(descriptor)) {
 			if (!disk_sync(descriptor)) {
 				printk(NOT_IN_SYNC,
-					partition_name(rdev->dev));
+					bdev_partition_name(rdev->bdev));
 				continue;
 			}
 			if ((descriptor->number > MD_SB_DISKS) ||
 					(disk_idx > sb->raid_disks)) {
 
 				printk(INCONSISTENT,
-					partition_name(rdev->dev));
+					bdev_partition_name(rdev->bdev));
 				continue;
 			}
 			if (disk->operational) {
 				printk(ALREADY_RUNNING,
-					partition_name(rdev->dev),
+					bdev_partition_name(rdev->bdev),
 					disk_idx);
 				continue;
 			}
-			printk(OPERATIONAL, partition_name(rdev->dev),
+			printk(OPERATIONAL, bdev_partition_name(rdev->bdev),
 					disk_idx);
 			disk->number = descriptor->number;
 			disk->raid_disk = disk_idx;
-			disk->dev = rdev->dev;
 			disk->bdev = rdev->bdev;
 			atomic_inc(&rdev->bdev->bd_count);
 			disk->operational = 1;
@@ -1348,10 +1344,9 @@ static int run(mddev_t *mddev)
 		/*
 		 * Must be a spare disk ..
 		 */
-			printk(SPARE, partition_name(rdev->dev));
+			printk(SPARE, bdev_partition_name(rdev->bdev));
 			disk->number = descriptor->number;
 			disk->raid_disk = disk_idx;
-			disk->dev = rdev->dev;
 			disk->bdev = rdev->bdev;
 			atomic_inc(&rdev->bdev->bd_count);
 			disk->operational = 0;
@@ -1385,7 +1380,6 @@ static int run(mddev_t *mddev)
 				!disk->used_slot) {
 			disk->number = descriptor->number;
 			disk->raid_disk = disk_idx;
-			disk->dev = NODEV;
 			disk->bdev = NULL;
 			disk->operational = 0;
 			disk->write_only = 0;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 5218dfabbc01..5d0af20344cc 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -441,7 +441,6 @@ static void raid5_build_block (struct stripe_head *sh, int i)
 
 static int error(mddev_t *mddev, struct block_device *bdev)
 {
-	kdev_t dev = to_kdev_t(bdev->bd_dev);
 	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
 	mdp_super_t *sb = mddev->sb;
 	struct disk_info *disk;
@@ -467,7 +466,7 @@ static int error(mddev_t *mddev, struct block_device *bdev)
 			printk (KERN_ALERT
 				"raid5: Disk failure on %s, disabling device."
 				" Operation continuing on %d devices\n",
-				partition_name (dev), conf->working_disks);
+				bdev_partition_name(bdev), conf->working_disks);
 		}
 		return 0;
 	}
@@ -479,7 +478,7 @@ static int error(mddev_t *mddev, struct block_device *bdev)
 		if (disk->bdev == bdev) {
 			printk (KERN_ALERT
 				"raid5: Disk failure on spare %s\n",
-				partition_name (dev));
+				bdev_partition_name (bdev));
 			if (!conf->spare->operational) {
 				/* probably a SET_DISK_FAULTY ioctl */
 				return -EIO;
@@ -1429,7 +1428,7 @@ static int run (mddev_t *mddev)
 		disk = conf->disks + raid_disk;
 
 		if (disk_faulty(desc)) {
-			printk(KERN_ERR "raid5: disabled device %s (errors detected)\n", partition_name(rdev->dev));
+			printk(KERN_ERR "raid5: disabled device %s (errors detected)\n", bdev_partition_name(rdev->bdev));
 			if (!rdev->faulty) {
 				MD_BUG();
 				goto abort;
@@ -1446,19 +1445,19 @@ static int run (mddev_t *mddev)
 		}
 		if (disk_active(desc)) {
 			if (!disk_sync(desc)) {
-				printk(KERN_ERR "raid5: disabled device %s (not in sync)\n", partition_name(rdev->dev));
+				printk(KERN_ERR "raid5: disabled device %s (not in sync)\n", bdev_partition_name(rdev->bdev));
 				MD_BUG();
 				goto abort;
 			}
 			if (raid_disk > sb->raid_disks) {
-				printk(KERN_ERR "raid5: disabled device %s (inconsistent descriptor)\n", partition_name(rdev->dev));
+				printk(KERN_ERR "raid5: disabled device %s (inconsistent descriptor)\n", bdev_partition_name(rdev->bdev));
 				continue;
 			}
 			if (disk->operational) {
-				printk(KERN_ERR "raid5: disabled device %s (device %d already operational)\n", partition_name(rdev->dev), raid_disk);
+				printk(KERN_ERR "raid5: disabled device %s (device %d already operational)\n", bdev_partition_name(rdev->bdev), raid_disk);
 				continue;
 			}
-			printk(KERN_INFO "raid5: device %s operational as raid disk %d\n", partition_name(rdev->dev), raid_disk);
+			printk(KERN_INFO "raid5: device %s operational as raid disk %d\n", bdev_partition_name(rdev->bdev), raid_disk);
 	
 			disk->number = desc->number;
 			disk->raid_disk = raid_disk;
@@ -1471,7 +1470,7 @@ static int run (mddev_t *mddev)
 			/*
 			 * Must be a spare disk ..
 			 */
-			printk(KERN_INFO "raid5: spare disk %s\n", partition_name(rdev->dev));
+			printk(KERN_INFO "raid5: spare disk %s\n", bdev_partition_name(rdev->bdev));
 			disk->number = desc->number;
 			disk->raid_disk = raid_disk;
 			disk->bdev = rdev->bdev;
@@ -1688,9 +1687,7 @@ static void print_raid5_conf (raid5_conf_t *conf)
 		printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n",
 			i, tmp->spare,tmp->operational,
 			tmp->number,tmp->raid_disk,tmp->used_slot,
-			partition_name(tmp->bdev ?
-					to_kdev_t(tmp->bdev->bd_dev):
-					NODEV));
+			bdev_partition_name(tmp->bdev));
 	}
 }
 
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index 6dfe08ccfbfe..280092c6e95b 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -66,7 +66,7 @@ extern struct hd_struct md_hd_struct[MAX_MD_DEVS];
 extern char * partition_name (kdev_t dev);
 extern inline char * bdev_partition_name (struct block_device *bdev)
 {
-	return partition_name(to_kdev_t(bdev->bd_dev));
+	return partition_name(bdev ? to_kdev_t(bdev->bd_dev) : NODEV);
 }
 extern int register_md_personality (int p_num, mdk_personality_t *p);
 extern int unregister_md_personality (int p_num);
diff --git a/include/linux/raid/multipath.h b/include/linux/raid/multipath.h
index 9c9cdc77fa05..e4f3e6189b7b 100644
--- a/include/linux/raid/multipath.h
+++ b/include/linux/raid/multipath.h
@@ -6,7 +6,6 @@
 struct multipath_info {
 	int		number;
 	int		raid_disk;
-	kdev_t		dev;
 	struct block_device *bdev;
 
 	/*
diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h
index f9f02ab19bd3..f63d68e55c11 100644
--- a/include/linux/raid/raid1.h
+++ b/include/linux/raid/raid1.h
@@ -8,7 +8,6 @@ typedef struct mirror_info mirror_info_t;
 struct mirror_info {
 	int		number;
 	int		raid_disk;
-	kdev_t		dev;
 	struct block_device *bdev;
 	sector_t	head_position;
 	atomic_t	nr_pending;
-- 
cgit v1.2.3


From dc5d0e468b264f2c36789ffd5fe5551cd72f387d Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Thu, 4 Jul 2002 08:53:54 -0700
Subject: [PATCH] raid kdev_t cleanups - part 3

	* ->dev killed for md/linear.c (same as previous parts)
---
 drivers/md/linear.c         | 7 +++----
 include/linux/raid/linear.h | 1 -
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index d8f29104dacf..daeeb075182c 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -60,7 +60,6 @@ static int linear_run (mddev_t *mddev)
 			goto out;
 		}
 
-		disk->dev = rdev->dev;
 		disk->bdev = rdev->bdev;
 		atomic_inc(&rdev->bdev->bd_count);
 		disk->size = rdev->size;
@@ -163,7 +162,7 @@ static int linear_make_request (request_queue_t *q, struct bio *bio)
     
 	if (block >= (tmp_dev->size + tmp_dev->offset)
 				|| block < tmp_dev->offset) {
-		printk ("linear_make_request: Block %ld out of bounds on dev %s size %ld offset %ld\n", block, kdevname(tmp_dev->dev), tmp_dev->size, tmp_dev->offset);
+		printk ("linear_make_request: Block %ld out of bounds on dev %s size %ld offset %ld\n", block, bdevname(tmp_dev->bdev), tmp_dev->size, tmp_dev->offset);
 		bio_io_error(bio);
 		return 0;
 	}
@@ -186,11 +185,11 @@ static int linear_status (char *page, mddev_t *mddev)
 	for (j = 0; j < conf->nr_zones; j++)
 	{
 		sz += sprintf(page+sz, "[%s",
-			partition_name(conf->hash_table[j].dev0->dev));
+			bdev_partition_name(conf->hash_table[j].dev0->bdev));
 
 		if (conf->hash_table[j].dev1)
 			sz += sprintf(page+sz, "/%s] ",
-			  partition_name(conf->hash_table[j].dev1->dev));
+			  bdev_partition_name(conf->hash_table[j].dev1->bdev));
 		else
 			sz += sprintf(page+sz, "] ");
 	}
diff --git a/include/linux/raid/linear.h b/include/linux/raid/linear.h
index 56e7c06e1086..dfb588a60ea6 100644
--- a/include/linux/raid/linear.h
+++ b/include/linux/raid/linear.h
@@ -4,7 +4,6 @@
 #include <linux/raid/md.h>
 
 struct dev_info {
-	kdev_t		dev;
 	struct block_device *bdev;
 	unsigned long	size;
 	unsigned long	offset;
-- 
cgit v1.2.3


From ab6a5810c0caab37af2b32016d45d48af8af57aa Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Thu, 4 Jul 2002 08:53:58 -0700
Subject: [PATCH] ex_dev switched to dev_t

	* svc_export ->ex_dev turned into dev_t.  It's a pure search
key and all places that set it actually do to_kdev_t(some_dev_t_expression).
---
 fs/nfsd/export.c            | 17 ++++++++---------
 fs/nfsd/nfsfh.c             | 12 ++++++------
 include/linux/nfsd/export.h |  4 ++--
 include/linux/nfsd/nfsfh.h  |  4 ++--
 4 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index e974fe17805f..6f72f241ea9b 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -68,17 +68,17 @@ static svc_client *		clients;
  * Find the client's export entry matching xdev/xino.
  */
 svc_export *
-exp_get(svc_client *clp, kdev_t dev, ino_t ino)
+exp_get(svc_client *clp, dev_t dev, ino_t ino)
 {
 	struct list_head *head, *p;
 	
 	if (!clp)
 		return NULL;
 
-	head = &clp->cl_export[EXPORT_HASH(kdev_t_to_nr(dev))];
+	head = &clp->cl_export[EXPORT_HASH(dev)];
 	list_for_each(p, head) {
 		svc_export *exp = list_entry(p, svc_export, ex_hash);
-		if (exp->ex_ino == ino && kdev_same(exp->ex_dev, dev))
+		if (exp->ex_ino == ino && exp->ex_dev ==  dev)
 			return exp;
 	}
 	return NULL;
@@ -250,7 +250,7 @@ exp_export(struct nfsctl_export *nxp)
 	struct nameidata nd;
 	struct inode	*inode = NULL;
 	int		err;
-	kdev_t		dev;
+	dev_t		dev;
 	ino_t		ino;
 
 	/* Consistency check */
@@ -276,7 +276,7 @@ exp_export(struct nfsctl_export *nxp)
 	if (err)
 		goto out_unlock;
 	inode = nd.dentry->d_inode;
-	dev = inode->i_dev;
+	dev = inode->i_sb->s_dev;
 	ino = inode->i_ino;
 	err = -EINVAL;
 
@@ -364,7 +364,7 @@ exp_export(struct nfsctl_export *nxp)
 	if (parent)
 		exp_change_parents(clp, parent, exp);
 
-	list_add(&exp->ex_hash, clp->cl_export + EXPORT_HASH(kdev_t_to_nr(dev)));
+	list_add(&exp->ex_hash, clp->cl_export + EXPORT_HASH(dev));
 	list_add_tail(&exp->ex_list, &clp->cl_list);
 
 	exp_fsid_hash(clp, exp);
@@ -398,7 +398,7 @@ exp_do_unexport(svc_export *unexp)
 	dentry = unexp->ex_dentry;
 	mnt = unexp->ex_mnt;
 	inode = dentry->d_inode;
-	if (!kdev_same(unexp->ex_dev, inode->i_dev) || unexp->ex_ino != inode->i_ino)
+	if (unexp->ex_dev != inode->i_sb->s_dev || unexp->ex_ino != inode->i_ino)
 		printk(KERN_WARNING "nfsd: bad dentry in unexport!\n");
 	dput(dentry);
 	mntput(mnt);
@@ -440,8 +440,7 @@ exp_unexport(struct nfsctl_export *nxp)
 	err = -EINVAL;
 	clp = exp_getclientbyname(nxp->ex_client);
 	if (clp) {
-		kdev_t ex_dev = to_kdev_t(nxp->ex_dev);
-		svc_export *exp = exp_get(clp, ex_dev, nxp->ex_ino);
+		svc_export *exp = exp_get(clp, nxp->ex_dev, nxp->ex_ino);
 		if (exp) {
 			exp_do_unexport(exp);
 			err = 0;
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 4b889c888fbb..f0f88f19164c 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -97,7 +97,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 	rqstp->rq_reffh = fh;
 
 	if (!fhp->fh_dentry) {
-		kdev_t xdev = NODEV;
+		dev_t xdev = 0;
 		ino_t xino = 0;
 		__u32 *datap=NULL;
 		__u32 tfh[3];		/* filehandle fragment for oldstyle filehandles */
@@ -122,7 +122,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 			case 0:
 				if ((data_left-=2)<0) goto out;
 				nfsdev = ntohl(*datap++);
-				xdev = mk_kdev(nfsdev>>16, nfsdev&0xFFFF);
+				xdev = MKDEV(nfsdev>>16, nfsdev&0xFFFF);
 				xino = *datap++;
 				break;
 			case 1:
@@ -136,7 +136,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 			if (fh->fh_size != NFS_FHSIZE)
 				goto out;
 			/* assume old filehandle format */
-			xdev = u32_to_kdev_t(fh->ofh_xdev);
+			xdev = u32_to_dev_t(fh->ofh_xdev);
 			xino = u32_to_ino_t(fh->ofh_xino);
 		}
 
@@ -308,7 +308,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st
 	__u32 *datap;
 
 	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
-		major(exp->ex_dev), minor(exp->ex_dev), (long) exp->ex_ino,
+		MAJOR(exp->ex_dev), MINOR(exp->ex_dev), (long) exp->ex_ino,
 		parent->d_name.name, dentry->d_name.name,
 		(inode ? inode->i_ino : 0));
 
@@ -329,7 +329,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st
 		memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
 		fhp->fh_handle.fh_size = NFS_FHSIZE;
 		fhp->fh_handle.ofh_dcookie = 0xfeebbaca;
-		fhp->fh_handle.ofh_dev =  htonl((major(exp->ex_dev)<<16)| minor(exp->ex_dev));
+		fhp->fh_handle.ofh_dev =  htonl((MAJOR(exp->ex_dev)<<16)| MINOR(exp->ex_dev));
 		fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
 		fhp->fh_handle.ofh_xino = ino_t_to_u32(exp->ex_ino);
 		fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
@@ -348,7 +348,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st
 		} else {
 			fhp->fh_handle.fh_fsid_type = 0;
 			/* fsid_type 0 == 2byte major, 2byte minor, 4byte inode */
-			*datap++ = htonl((major(exp->ex_dev)<<16)| minor(exp->ex_dev));
+			*datap++ = htonl((MAJOR(exp->ex_dev)<<16)| MINOR(exp->ex_dev));
 			*datap++ = ino_t_to_u32(exp->ex_ino);
 			fhp->fh_handle.fh_size = 3*4;
 		}
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index ab7b149a3f84..9de10bf92e32 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -70,7 +70,7 @@ struct svc_export {
 	int			ex_flags;
 	struct vfsmount *	ex_mnt;
 	struct dentry *		ex_dentry;
-	kdev_t			ex_dev;
+	dev_t			ex_dev;
 	ino_t			ex_ino;
 	uid_t			ex_anon_uid;
 	gid_t			ex_anon_gid;
@@ -94,7 +94,7 @@ void			exp_readlock(void);
 void			exp_readunlock(void);
 struct svc_client *	exp_getclient(struct sockaddr_in *sin);
 void			exp_putclient(struct svc_client *clp);
-struct svc_export *	exp_get(struct svc_client *clp, kdev_t dev, ino_t ino);
+struct svc_export *	exp_get(struct svc_client *clp, dev_t dev, ino_t ino);
 struct svc_export *	exp_get_fsid(struct svc_client *clp, int fsid);
 struct svc_export *	exp_get_by_name(struct svc_client *clp,
 					struct vfsmount *mnt,
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 84c72958f99b..53f55cc17a50 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -143,13 +143,13 @@ static inline __u32 kdev_t_to_u32(kdev_t dev)
 	return udev;
 }
 
-static inline kdev_t u32_to_kdev_t(__u32 udev)
+static inline dev_t u32_to_dev_t(__u32 udev)
 {
 	unsigned int minor, major;
 
 	minor = (udev & 0xff) | ((udev >> 8) & 0xfff00);
 	major = ((udev >> 8) & 0xff) | ((udev >> 20) & 0xf00);
-	return mk_kdev(major, minor);
+	return MKDEV(major, minor);
 }
 
 static inline __u32 ino_t_to_u32(ino_t ino)
-- 
cgit v1.2.3


From c9add9b811a80322afe4388aa012ff431d41e0cf Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Thu, 4 Jul 2002 08:54:03 -0700
Subject: [PATCH] assorted kdev_t cleanups in filesystems

	* JFS uses its ->logdev only twice - one of the places assigns
it to_kdev_t(le32_to_cpu(...)), another uses kdev_t_to_nr() of it.
Switched to u32 - it's just a place where we store device number we'd got
from superblock.
	* several reiserfs_fs.h function prototypes removed - functions
in question don't exist anymore.
	* smbfs doesn't support device nodes; ->f_rdev removed.
---
 fs/jfs/jfs_incore.h            | 2 +-
 fs/jfs/jfs_logmgr.c            | 2 +-
 fs/jfs/jfs_mount.c             | 2 +-
 fs/smbfs/inode.c               | 2 --
 include/linux/reiserfs_fs.h    | 3 ---
 include/linux/reiserfs_fs_sb.h | 2 +-
 include/linux/smb.h            | 2 --
 7 files changed, 4 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index c54907dddbe5..71256e2322cd 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -122,7 +122,7 @@ struct jfs_sb_info {
 	short		nbperpage;	/* 2: blocks per page		*/
 	short		l2nbperpage;	/* 2: log2 blocks per page	*/
 	short		l2niperblk;	/* 2: log2 inodes per page	*/
-	kdev_t		logdev;		/* 2: external log device	*/
+	u32		logdev;		/* 2: external log device	*/
 	uint		aggregate;	/* volume identifier in log record */
 	pxd_t		logpxd;		/* 8: pxd describing log	*/
 	pxd_t		ait2;		/* 8: pxd describing AIT copy	*/
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 1bd50e77c4a5..68e08f8b6004 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1102,7 +1102,7 @@ int lmLogOpen(struct super_block *sb, log_t ** logptr)
 	 */
       externalLog:
 
-	if (!(bdev = bdget(kdev_t_to_nr(JFS_SBI(sb)->logdev)))) {
+	if (!(bdev = bdget(JFS_SBI(sb)->logdev))) {
 		rc = ENODEV;
 		goto free;
 	}
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index afab3ab7ed5f..d8a7773fa9ff 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -406,7 +406,7 @@ static int chkSuper(struct super_block *sb)
 	if (sbi->mntflag & JFS_INLINELOG)
 		sbi->logpxd = j_sb->s_logpxd;
 	else {
-		sbi->logdev = to_kdev_t(le32_to_cpu(j_sb->s_logdev));
+		sbi->logdev = le32_to_cpu(j_sb->s_logdev);
 		memcpy(sbi->uuid, j_sb->s_uuid, sizeof(sbi->uuid));
 		memcpy(sbi->loguuid, j_sb->s_loguuid, sizeof(sbi->uuid));
 	}
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index c0b5997f0b7d..0d439b0b1508 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -145,7 +145,6 @@ smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr)
 	fattr->f_ino	= inode->i_ino;
 	fattr->f_uid	= inode->i_uid;
 	fattr->f_gid	= inode->i_gid;
-	fattr->f_rdev	= inode->i_rdev;
 	fattr->f_size	= inode->i_size;
 	fattr->f_mtime	= inode->i_mtime;
 	fattr->f_ctime	= inode->i_ctime;
@@ -183,7 +182,6 @@ smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
 	inode->i_nlink	= fattr->f_nlink;
 	inode->i_uid	= fattr->f_uid;
 	inode->i_gid	= fattr->f_gid;
-	inode->i_rdev	= fattr->f_rdev;
 	inode->i_ctime	= fattr->f_ctime;
 	inode->i_blksize= fattr->f_blksize;
 	inode->i_blocks = fattr->f_blocks;
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 29f6063b3546..d8b3fa0a7ba0 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1675,13 +1675,10 @@ int pop_journal_writer(int windex) ;
 int journal_transaction_should_end(struct reiserfs_transaction_handle *, int) ;
 int reiserfs_in_journal(struct super_block *p_s_sb, unsigned long bl, int searchall, unsigned long *next) ;
 int journal_begin(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ;
-struct super_block *reiserfs_get_super(kdev_t dev) ;
 void flush_async_commits(struct super_block *p_s_sb) ;
 
 int buffer_journaled(const struct buffer_head *bh) ;
 int mark_buffer_journal_new(struct buffer_head *bh) ;
-int reiserfs_sync_all_buffers(kdev_t dev, int wait) ;
-int reiserfs_sync_buffers(kdev_t dev, int wait) ;
 int reiserfs_add_page_to_flush_list(struct reiserfs_transaction_handle *,
                                     struct inode *, struct buffer_head *) ;
 int reiserfs_remove_page_from_flush_list(struct reiserfs_transaction_handle *,
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 04ec2e907f90..534d8e20bdb7 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -160,7 +160,7 @@ struct reiserfs_transaction_handle {
   int t_blocks_allocated ;      /* number of blocks this writer allocated */
   unsigned long t_trans_id ;    /* sanity check, equals the current trans id */
   struct super_block *t_super ; /* super for this FS when journal_begin was 
-                                   called. saves calls to reiserfs_get_super */
+                                   called. */
 } ;
 
 /*
diff --git a/include/linux/smb.h b/include/linux/smb.h
index a9d88d856df5..5b8dce292377 100644
--- a/include/linux/smb.h
+++ b/include/linux/smb.h
@@ -10,7 +10,6 @@
 #define _LINUX_SMB_H
 
 #include <linux/types.h>
-#include <linux/kdev_t.h>
 
 enum smb_protocol { 
 	SMB_PROTOCOL_NONE, 
@@ -85,7 +84,6 @@ struct smb_fattr {
 	nlink_t		f_nlink;
 	uid_t		f_uid;
 	gid_t		f_gid;
-	kdev_t		f_rdev;
 	loff_t		f_size;
 	time_t		f_atime;
 	time_t		f_mtime;
-- 
cgit v1.2.3


From 88cc0d3e8fc80880372e6900712a6f3f8172286a Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Thu, 4 Jul 2002 08:54:08 -0700
Subject: [PATCH] ->i_dev switched to dev_t

	* ->i_dev followed the example of ->s_dev - it's dev_t now.  All
remaining uses of ->i_dev either outright want dev_t (stat()) or couldn't
care less (printing major:minor in /proc/<pid>/maps, etc.)
---
 arch/sparc/kernel/signal.c     |  6 +++---
 arch/sparc64/kernel/signal.c   |  6 +++---
 arch/sparc64/kernel/signal32.c |  6 +++---
 fs/block_dev.c                 |  1 -
 fs/inode.c                     |  2 +-
 fs/locks.c                     |  5 ++++-
 fs/nfsd/nfs3xdr.c              |  2 +-
 fs/nfsd/vfs.c                  | 12 ++++++------
 fs/proc/array.c                | 10 +++++-----
 fs/stat.c                      |  2 +-
 include/linux/fs.h             |  2 +-
 include/linux/kdev_t.h         |  3 +--
 net/socket.c                   |  2 +-
 13 files changed, 30 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/kernel/signal.c b/arch/sparc/kernel/signal.c
index 743a2eed2cd5..7dfb666b685a 100644
--- a/arch/sparc/kernel/signal.c
+++ b/arch/sparc/kernel/signal.c
@@ -1090,7 +1090,7 @@ static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs,
 
 #ifdef DEBUG_SIGNALS_MAPS
 
-#define MAPS_LINE_FORMAT	  "%08lx-%08lx %s %08lx %s %lu "
+#define MAPS_LINE_FORMAT	  "%08lx-%08lx %s %08lx %02x:%02x %lu "
 
 static inline void read_maps (void)
 {
@@ -1107,7 +1107,7 @@ static inline void read_maps (void)
 		char *line;
 		char str[5], *cp = str;
 		int flags;
-		kdev_t dev;
+		dev_t dev;
 		unsigned long ino;
 
 		/*
@@ -1132,7 +1132,7 @@ static inline void read_maps (void)
 				      buffer, PAGE_SIZE);
 		}
 		printk(MAPS_LINE_FORMAT, map->vm_start, map->vm_end, str, map->vm_pgoff << PAGE_SHIFT,
-			      kdevname(dev), ino);
+			      MAJOR(dev), MINOR(dev), ino);
 		if (map->vm_file != NULL)
 			printk("%s\n", line);
 		else
diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c
index 3edaef15cc9e..9cc20dc19033 100644
--- a/arch/sparc64/kernel/signal.c
+++ b/arch/sparc64/kernel/signal.c
@@ -633,7 +633,7 @@ static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs,
 
 #ifdef DEBUG_SIGNALS_MAPS
 
-#define MAPS_LINE_FORMAT	  "%016lx-%016lx %s %016lx %s %lu "
+#define MAPS_LINE_FORMAT	  "%016lx-%016lx %s %016lx %02x:%02x %lu "
 
 static inline void read_maps (void)
 {
@@ -650,7 +650,7 @@ static inline void read_maps (void)
 		char *line;
 		char str[5], *cp = str;
 		int flags;
-		kdev_t dev;
+		dev_t dev;
 		unsigned long ino;
 
 		/*
@@ -675,7 +675,7 @@ static inline void read_maps (void)
 				      buffer, PAGE_SIZE);
 		}
 		printk(MAPS_LINE_FORMAT, map->vm_start, map->vm_end, str, map->vm_pgoff << PAGE_SHIFT,
-			      kdevname(dev), ino);
+			      MAJOR(dev), MINOR(dev), ino);
 		if (map->vm_file != NULL)
 			printk("%s\n", line);
 		else
diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c
index 4a49af3a33ba..47246310aa15 100644
--- a/arch/sparc64/kernel/signal32.c
+++ b/arch/sparc64/kernel/signal32.c
@@ -1319,7 +1319,7 @@ static inline void syscall_restart32(unsigned long orig_i0, struct pt_regs *regs
 
 #ifdef DEBUG_SIGNALS_MAPS
 
-#define MAPS_LINE_FORMAT	  "%016lx-%016lx %s %016lx %s %lu "
+#define MAPS_LINE_FORMAT	  "%016lx-%016lx %s %016lx %02x:%02x %lu "
 
 static inline void read_maps (void)
 {
@@ -1336,7 +1336,7 @@ static inline void read_maps (void)
 		char *line;
 		char str[5], *cp = str;
 		int flags;
-		kdev_t dev;
+		dev_t dev;
 		unsigned long ino;
 
 		/*
@@ -1361,7 +1361,7 @@ static inline void read_maps (void)
 				      buffer, PAGE_SIZE);
 		}
 		printk(MAPS_LINE_FORMAT, map->vm_start, map->vm_end, str, map->vm_pgoff << PAGE_SHIFT,
-			      kdevname(dev), ino);
+			      MAJOR(dev), MINOR(dev), ino);
 		if (map->vm_file != NULL)
 			printk("%s\n", line);
 		else
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 34f88ef66b0f..3ee4d14491bc 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -299,7 +299,6 @@ struct block_device *bdget(dev_t dev)
 			new_bdev->bd_inode = inode;
 			inode->i_mode = S_IFBLK;
 			inode->i_rdev = kdev;
-			inode->i_dev = kdev;
 			inode->i_bdev = new_bdev;
 			inode->i_data.a_ops = &def_blk_aops;
 			inode->i_data.gfp_mask = GFP_USER;
diff --git a/fs/inode.c b/fs/inode.c
index a3b2cd4e8a3c..d0e6bda9772a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -101,7 +101,7 @@ static struct inode *alloc_inode(struct super_block *sb)
 		struct address_space * const mapping = &inode->i_data;
 
 		inode->i_sb = sb;
-		inode->i_dev = to_kdev_t(sb->s_dev);
+		inode->i_dev = sb->s_dev;
 		inode->i_blkbits = sb->s_blocksize_bits;
 		inode->i_flags = 0;
 		atomic_set(&inode->i_count, 1);
diff --git a/fs/locks.c b/fs/locks.c
index 1e58f91263c5..bb32d1516343 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1751,9 +1751,12 @@ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
 			       ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ "
 			       : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ ");
 	}
+	/*
+	 *	NOTE: it should be inode->i_sb->s_id, not kdevname(...).
+	 */
 	out += sprintf(out, "%d %s:%ld ",
 		     fl->fl_pid,
-		     inode ? kdevname(inode->i_dev) : "<none>",
+		     inode ? kdevname(to_kdev_t(inode->i_dev)) : "<none>",
 		     inode ? inode->i_ino : 0);
 	out += sprintf(out, "%Ld ", fl->fl_start);
 	if (fl->fl_end == OFFSET_MAX)
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 89d1f99ef77c..71279e639ba3 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -219,7 +219,7 @@ encode_saved_post_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 	    && (fhp->fh_export->ex_flags & NFSEXP_FSID))
 		p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
 	else
-		p = xdr_encode_hyper(p, (u64) kdev_t_to_nr(inode->i_dev));
+		p = xdr_encode_hyper(p, (u64) inode->i_dev);
 	p = xdr_encode_hyper(p, (u64) inode->i_ino);
 	p = encode_time3(p, fhp->fh_post_atime);
 	p = encode_time3(p, fhp->fh_post_mtime);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 02419edd0250..56cd76ef5c57 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -66,7 +66,7 @@ struct raparms {
 	struct raparms		*p_next;
 	unsigned int		p_count;
 	ino_t			p_ino;
-	kdev_t			p_dev;
+	dev_t			p_dev;
 	struct file_ra_state	p_ra;
 };
 
@@ -527,14 +527,14 @@ nfsd_sync_dir(struct dentry *dp)
 static spinlock_t ra_lock = SPIN_LOCK_UNLOCKED;
 
 static inline struct raparms *
-nfsd_get_raparms(kdev_t dev, ino_t ino)
+nfsd_get_raparms(dev_t dev, ino_t ino)
 {
 	struct raparms	*ra, **rap, **frap = NULL;
 	int depth = 0;
 
 	spin_lock(&ra_lock);
 	for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) {
-		if (ra->p_ino == ino && kdev_same(ra->p_dev, dev))
+		if (ra->p_ino == ino && ra->p_dev == dev)
 			goto found;
 		depth++;
 		if (ra->p_count == 0)
@@ -691,8 +691,8 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 	}
 
 	if (err >= 0 && stable) {
-		static unsigned long	last_ino;
-		static kdev_t		last_dev = NODEV;
+		static ino_t	last_ino;
+		static dev_t	last_dev = 0;
 
 		/*
 		 * Gathered writes: If another process is currently
@@ -708,7 +708,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 		 */
 		if (EX_WGATHER(exp)) {
 			if (atomic_read(&inode->i_writecount) > 1
-			    || (last_ino == inode->i_ino && kdev_same(last_dev, inode->i_dev))) {
+			    || (last_ino == inode->i_ino && last_dev == inode->i_dev)) {
 				dprintk("nfsd: write defer %d\n", current->pid);
 				set_current_state(TASK_UNINTERRUPTIBLE);
 				schedule_timeout((HZ+99)/100);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ff74e5098ea7..218e20e62701 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -538,11 +538,11 @@ int proc_pid_statm(struct task_struct *task, char * buffer)
  *         + (index into the line)
  */
 /* for systems with sizeof(void*) == 4: */
-#define MAPS_LINE_FORMAT4	  "%08lx-%08lx %s %08lx %s %lu"
+#define MAPS_LINE_FORMAT4	  "%08lx-%08lx %s %08lx %02x:%02x %lu"
 #define MAPS_LINE_MAX4	49 /* sum of 8  1  8  1 4 1 8 1 5 1 10 1 */
 
 /* for systems with sizeof(void*) == 8: */
-#define MAPS_LINE_FORMAT8	  "%016lx-%016lx %s %016lx %s %lu"
+#define MAPS_LINE_FORMAT8	  "%016lx-%016lx %s %016lx %02x:%02x %lu"
 #define MAPS_LINE_MAX8	73 /* sum of 16  1  16  1 4 1 16 1 5 1 10 1 */
 
 #define MAPS_LINE_FORMAT	(sizeof(void*) == 4 ? MAPS_LINE_FORMAT4 : MAPS_LINE_FORMAT8)
@@ -554,7 +554,7 @@ static int proc_pid_maps_get_line (char *buf, struct vm_area_struct *map)
 	char *line;
 	char str[5];
 	int flags;
-	kdev_t dev;
+	dev_t dev;
 	unsigned long ino;
 	int len;
 
@@ -566,7 +566,7 @@ static int proc_pid_maps_get_line (char *buf, struct vm_area_struct *map)
 	str[3] = flags & VM_MAYSHARE ? 's' : 'p';
 	str[4] = 0;
 
-	dev = NODEV;
+	dev = 0;
 	ino = 0;
 	if (map->vm_file != NULL) {
 		dev = map->vm_file->f_dentry->d_inode->i_dev;
@@ -584,7 +584,7 @@ static int proc_pid_maps_get_line (char *buf, struct vm_area_struct *map)
 	len = sprintf(line,
 		      MAPS_LINE_FORMAT,
 		      map->vm_start, map->vm_end, str, map->vm_pgoff << PAGE_SHIFT,
-		      kdevname(dev), ino);
+		      MAJOR(dev), MINOR(dev), ino);
 
 	if(map->vm_file) {
 		int i;
diff --git a/fs/stat.c b/fs/stat.c
index 3f87e0e823ec..bce88512b2b5 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -17,7 +17,7 @@
 
 void generic_fillattr(struct inode *inode, struct kstat *stat)
 {
-	stat->dev = kdev_t_to_nr(inode->i_dev);
+	stat->dev = inode->i_dev;
 	stat->ino = inode->i_ino;
 	stat->mode = inode->i_mode;
 	stat->nlink = inode->i_nlink;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1292fc4474cc..647d2e20ce52 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -359,7 +359,7 @@ struct inode {
 	struct list_head	i_dentry;
 	unsigned long		i_ino;
 	atomic_t		i_count;
-	kdev_t			i_dev;
+	dev_t			i_dev;
 	umode_t			i_mode;
 	nlink_t			i_nlink;
 	uid_t			i_uid;
diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h
index 2c61fc5161b6..1721915141c7 100644
--- a/include/linux/kdev_t.h
+++ b/include/linux/kdev_t.h
@@ -36,8 +36,7 @@ typedef struct { unsigned short major, minor; } kdev_t;
 Admissible operations on an object of type kdev_t:
 - passing it along
 - comparing it for equality with another such object
-- storing it in inode->i_dev, inode->i_rdev, req->rq_dev, de->dc_dev,
-- tty->device
+- storing it in inode->i_rdev, req->rq_dev, de->dc_dev, tty->device
 - using its bit pattern as argument in a hash function
 - finding its major and minor
 - complaining about it
diff --git a/net/socket.c b/net/socket.c
index bed470afb5ed..798eeab2f47c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -465,7 +465,7 @@ struct socket *sock_alloc(void)
 	if (!inode)
 		return NULL;
 
-	inode->i_dev = NODEV;
+	inode->i_dev = 0;
 	sock = SOCKET_I(inode);
 
 	inode->i_mode = S_IFSOCK|S_IRWXUGO;
-- 
cgit v1.2.3


From 112b963110a2a07193f941a48bae39052f7a7ce8 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <greg@kroah.com>
Date: Fri, 5 Jul 2002 00:30:41 -0700
Subject: USB: removed file ops from usb device structure Moved the file ops
 and minor number stuff out of the usb structure, Now usb_register_dev() and
 usb_deregister_dev() must be called if you want to use the USB major number.

---
 drivers/usb/core/Makefile |   6 +-
 drivers/usb/core/file.c   | 182 ++++++++++++++++++++++++++++++++++++
 drivers/usb/core/inode.c  |  19 ----
 drivers/usb/core/usb.c    | 233 ++++------------------------------------------
 include/linux/usb.h       |  23 +----
 5 files changed, 208 insertions(+), 255 deletions(-)
 create mode 100644 drivers/usb/core/file.c

(limited to 'include/linux')

diff --git a/drivers/usb/core/Makefile b/drivers/usb/core/Makefile
index 506dfecc19d2..11a56285c9ee 100644
--- a/drivers/usb/core/Makefile
+++ b/drivers/usb/core/Makefile
@@ -2,17 +2,17 @@
 # Makefile for USB Core files and filesystem
 #
 
-export-objs	:= usb.o hcd.o hcd-pci.o urb.o message.o config.o
+export-objs	:= usb.o hcd.o hcd-pci.o urb.o message.o config.o file.o
 
 usbcore-objs	:= usb.o usb-debug.o hub.o hcd.o urb.o message.o \
-			config.o
+			config.o file.o
 
 ifeq ($(CONFIG_PCI),y)
 	usbcore-objs	+= hcd-pci.o
 endif
 
 ifeq ($(CONFIG_USB_DEVICEFS),y)
-	usbcore-objs	+= devio.o inode.o drivers.o devices.o
+	usbcore-objs	+= devio.o inode.o devices.o
 endif
 
 obj-$(CONFIG_USB)	+= usbcore.o
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
new file mode 100644
index 000000000000..eba43f1d84e5
--- /dev/null
+++ b/drivers/usb/core/file.c
@@ -0,0 +1,182 @@
+/*
+ * drivers/usb/file.c
+ *
+ * (C) Copyright Linus Torvalds 1999
+ * (C) Copyright Johannes Erdfelt 1999-2001
+ * (C) Copyright Andreas Gal 1999
+ * (C) Copyright Gregory P. Smith 1999
+ * (C) Copyright Deti Fliegl 1999 (new USB architecture)
+ * (C) Copyright Randy Dunlap 2000
+ * (C) Copyright David Brownell 2000-2001 (kernel hotplug, usb_device_id,
+ 	more docs, etc)
+ * (C) Copyright Yggdrasil Computing, Inc. 2000
+ *     (usb_device_id matching changes by Adam J. Richter)
+ * (C) Copyright Greg Kroah-Hartman 2002
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+
+#ifdef CONFIG_USB_DEBUG
+	#define DEBUG
+#else
+	#undef DEBUG
+#endif
+#include <linux/usb.h>
+
+devfs_handle_t usb_devfs_handle;	/* /dev/usb dir. */
+EXPORT_SYMBOL(usb_devfs_handle);
+
+#define MAX_USB_MINORS	256
+static struct file_operations *usb_minors[MAX_USB_MINORS];
+static spinlock_t minor_lock = SPIN_LOCK_UNLOCKED;
+
+static int usb_open(struct inode * inode, struct file * file)
+{
+	int minor = minor(inode->i_rdev);
+	struct file_operations *c;
+	int err = -ENODEV;
+	struct file_operations *old_fops, *new_fops = NULL;
+
+	spin_lock (&minor_lock);
+	c = usb_minors[minor];
+	spin_unlock (&minor_lock);
+
+	if (!c || !(new_fops = fops_get(c)))
+		return err;
+	old_fops = file->f_op;
+	file->f_op = new_fops;
+	/* Curiouser and curiouser... NULL ->open() as "no device" ? */
+	if (file->f_op->open)
+		err = file->f_op->open(inode,file);
+	if (err) {
+		fops_put(file->f_op);
+		file->f_op = fops_get(old_fops);
+	}
+	fops_put(old_fops);
+	return err;
+}
+
+static struct file_operations usb_fops = {
+	owner:		THIS_MODULE,
+	open:		usb_open,
+};
+
+int usb_major_init(void)
+{
+	if (devfs_register_chrdev(USB_MAJOR, "usb", &usb_fops)) {
+		err("unable to get major %d for usb devices", USB_MAJOR);
+		return -EBUSY;
+	}
+
+	usb_devfs_handle = devfs_mk_dir(NULL, "usb", NULL);
+
+	return 0;
+}
+
+void usb_major_cleanup(void)
+{
+	devfs_unregister(usb_devfs_handle);
+	devfs_unregister_chrdev(USB_MAJOR, "usb");
+}
+
+/**
+ * usb_register_dev - register a USB device, and ask for a minor number
+ * @fops: the file operations for this USB device
+ * @minor: the requested starting minor for this device.
+ * @num_minors: number of minor numbers requested for this device
+ * @start_minor: place to put the new starting minor number
+ *
+ * This should be called by all USB drivers that use the USB major number.
+ * If CONFIG_USB_DYNAMIC_MINORS is enabled, the minor number will be
+ * dynamically allocated out of the list of available ones.  If it is not
+ * enabled, the minor number will be based on the next available free minor,
+ * starting at the requested @minor.
+ *
+ * usb_deregister_dev() must be called when the driver is done with
+ * the minor numbers given out by this function.
+ *
+ * Returns -EINVAL if something bad happens with trying to register a
+ * device, and 0 on success, alone with a value that the driver should
+ * use in start_minor.
+ */
+int usb_register_dev (struct file_operations *fops, int minor, int num_minors, int *start_minor)
+{
+	int i;
+	int j;
+	int good_spot;
+	int retval = -EINVAL;
+
+#ifdef CONFIG_USB_DYNAMIC_MINORS
+	/* 
+	 * We don't care what the device tries to start at, we want to start
+	 * at zero to pack the devices into the smallest available space with
+	 * no holes in the minor range.
+	 */
+	minor = 0;
+#endif
+
+	dbg ("asking for %d minors, starting at %d", num_minors, minor);
+
+	if (fops == NULL)
+		goto exit;
+
+	*start_minor = 0; 
+	spin_lock (&minor_lock);
+	for (i = minor; i < MAX_USB_MINORS; ++i) {
+		if (usb_minors[i])
+			continue;
+
+		good_spot = 1;
+		for (j = 1; j <= num_minors-1; ++j)
+			if (usb_minors[i+j]) {
+				good_spot = 0;
+				break;
+			}
+		if (good_spot == 0)
+			continue;
+
+		*start_minor = i;
+		dbg("found a minor chunk free, starting at %d", i);
+		for (i = *start_minor; i < (*start_minor + num_minors); ++i)
+			usb_minors[i] = fops;
+
+		retval = 0;
+		goto exit;
+	}
+exit:
+	spin_unlock (&minor_lock);
+	return retval;
+}
+EXPORT_SYMBOL(usb_register_dev);
+
+/**
+ * usb_deregister_dev - deregister a USB device's dynamic minor.
+ * @num_minors: number of minor numbers to put back.
+ * @start_minor: the starting minor number
+ *
+ * Used in conjunction with usb_register_dev().  This function is called
+ * when the USB driver is finished with the minor numbers gotten from a
+ * call to usb_register_dev() (usually when the device is disconnected
+ * from the system.)
+ * 
+ * This should be called by all drivers that use the USB major number.
+ */
+void usb_deregister_dev (int num_minors, int start_minor)
+{
+	int i;
+
+	dbg ("removing %d minors starting at %d", num_minors, start_minor);
+
+	spin_lock (&minor_lock);
+	for (i = start_minor; i < (start_minor + num_minors); ++i)
+		usb_minors[i] = NULL;
+	spin_unlock (&minor_lock);
+}
+EXPORT_SYMBOL(usb_deregister_dev);
+
+
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 8d31640ff966..c637805a4705 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -48,7 +48,6 @@ static spinlock_t mount_lock = SPIN_LOCK_UNLOCKED;
 static int mount_count;	/* = 0 */
 
 static struct dentry *devices_dentry;
-static struct dentry *drivers_dentry;
 static int num_buses;	/* = 0 */
 
 static uid_t devuid;	/* = 0 */
@@ -548,16 +547,6 @@ static int create_special_files (void)
 		return -ENODEV;
 	}
 
-	drivers_dentry = fs_create_file ("drivers",
-					 listmode | S_IFREG,
-					 NULL, NULL,
-					 &usbdevfs_drivers_fops,
-					 listuid, listgid);
-	if (drivers_dentry == NULL) {
-		err ("Unable to create drivers usbfs file");
-		return -ENODEV;
-	}
-
 	return 0;
 }
 
@@ -565,10 +554,7 @@ static void remove_special_files (void)
 {
 	if (devices_dentry)
 		fs_remove_file (devices_dentry);
-	if (drivers_dentry)
-		fs_remove_file (drivers_dentry);
 	devices_dentry = NULL;
-	drivers_dentry = NULL;
 	remove_mount();
 }
 
@@ -581,11 +567,6 @@ void usbfs_update_special (void)
 		if (inode)
 			inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	}
-	if (drivers_dentry) {
-		inode = devices_dentry->d_inode;
-		if (inode)
-			inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	}
 }
 
 void usbfs_add_bus(struct usb_bus *bus)
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 4842fbf06737..10591d54aebb 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -30,7 +30,6 @@
 #include <linux/interrupt.h>  /* for in_interrupt() */
 #include <linux/kmod.h>
 #include <linux/init.h>
-#include <linux/devfs_fs_kernel.h>
 #include <linux/spinlock.h>
 #include <linux/errno.h>
 
@@ -45,6 +44,8 @@
 
 extern int  usb_hub_init(void);
 extern void usb_hub_cleanup(void);
+extern int usb_major_init(void);
+extern void usb_major_cleanup(void);
 
 /*
  * Prototypes for the device driver probing/loading functions
@@ -58,75 +59,23 @@ static void usb_check_support(struct usb_device *);
  */
 LIST_HEAD(usb_driver_list);
 
-devfs_handle_t usb_devfs_handle;	/* /dev/usb dir. */
-
-#define MAX_USB_MINORS	256
-static struct usb_driver *usb_minors[MAX_USB_MINORS];
-static spinlock_t minor_lock = SPIN_LOCK_UNLOCKED;
-
-static int usb_register_minors (struct usb_driver *driver, int num_minors, int start_minor)
-{
-	int i;
-
-	dbg("registering %d minors, starting at %d", num_minors, start_minor);
-
-	if (start_minor + num_minors >= MAX_USB_MINORS)
-		return -EINVAL;
-
-	spin_lock (&minor_lock);
-	for (i = start_minor; i < (start_minor + num_minors); ++i)
-		if (usb_minors[i]) {
-			spin_unlock (&minor_lock);
-			err("minor %d is already in use, error registering %s driver",
-			    i, driver->name);
-			return -EINVAL;
-		}
-		
-	for (i = start_minor; i < (start_minor + num_minors); ++i)
-		usb_minors[i] = driver;
-
-	spin_unlock (&minor_lock);
-	return 0;
-}
-
-static void usb_deregister_minors (struct usb_driver *driver, int num_minors, int start_minor)
-{
-	int i;
-
-	dbg ("%s is removing %d minors starting at %d", driver->name,
-	     num_minors, start_minor);
-
-	spin_lock (&minor_lock);
-	for (i = start_minor; i < (start_minor + num_minors); ++i)
-		usb_minors[i] = NULL;
-	spin_unlock (&minor_lock);
-}
 
 /**
- *	usb_register - register a USB driver
- *	@new_driver: USB operations for the driver
+ * usb_register - register a USB driver
+ * @new_driver: USB operations for the driver
  *
- *	Registers a USB driver with the USB core.  The list of unattached
- *	interfaces will be rescanned whenever a new driver is added, allowing
- *	the new driver to attach to any recognized devices.
- *	Returns a negative error code on failure and 0 on success.
+ * Registers a USB driver with the USB core.  The list of unattached
+ * interfaces will be rescanned whenever a new driver is added, allowing
+ * the new driver to attach to any recognized devices.
+ * Returns a negative error code on failure and 0 on success.
+ * 
+ * NOTE: if you want your driver to use the USB major number, you must call
+ * usb_register_dev() to enable that functionality.  This function no longer
+ * takes care of that.
  */
 int usb_register(struct usb_driver *new_driver)
 {
 	int retval = 0;
-	
-	if ((new_driver->fops) && (new_driver->num_minors == 0)) {
-		err ("%s driver must specify num_minors", new_driver->name);
-		return -EINVAL;
-	}
-
-#ifndef CONFIG_USB_DYNAMIC_MINORS
-	if (new_driver->fops != NULL) {
-		retval = usb_register_minors (new_driver, new_driver->num_minors, new_driver->minor);
-		if (retval)
-			return retval;
-	}
-#endif
 
 	info("registered new driver %s", new_driver->name);
 
@@ -143,92 +92,6 @@ int usb_register(struct usb_driver *new_driver)
 }
 
 
-/**
- * usb_register_dev - register a USB device, and ask for a minor number
- * @new_driver: USB operations for the driver
- * @num_minors: number of minor numbers requested for this device
- * @start_minor: place to put the new starting minor number
- *
- * Used to ask the USB core for a new minor number for a device that has
- * just showed up.  This is used to dynamically allocate minor numbers
- * from the pool of USB reserved minor numbers.
- *
- * This should be called by all drivers that use the USB major number.
- * This only returns a good value of CONFIG_USB_DYNAMIC_MINORS is
- * selected by the user.
- *
- * usb_deregister_dev() should be called when the driver is done with
- * the minor numbers given out by this function.
- *
- * Returns -ENODEV if CONFIG_USB_DYNAMIC_MINORS is not enabled in this
- * kernel, -EINVAL if something bad happens with trying to register a
- * device, and 0 on success, alone with a value that the driver should
- * use in start_minor.
- */
-#ifdef CONFIG_USB_DYNAMIC_MINORS
-int usb_register_dev (struct usb_driver *new_driver, int num_minors, int *start_minor)
-{
-	int i;
-	int j;
-	int good_spot;
-	int retval = -EINVAL;
-
-	dbg ("%s is asking for %d minors", new_driver->name, num_minors);
-
-	if (new_driver->fops == NULL)
-		goto exit;
-
-	*start_minor = 0; 
-	spin_lock (&minor_lock);
-	for (i = 0; i < MAX_USB_MINORS; ++i) {
-		if (usb_minors[i])
-			continue;
-
-		good_spot = 1;
-		for (j = 1; j <= num_minors-1; ++j)
-			if (usb_minors[i+j]) {
-				good_spot = 0;
-				break;
-			}
-		if (good_spot == 0)
-			continue;
-
-		*start_minor = i;
-		spin_unlock (&minor_lock);
-		retval = usb_register_minors (new_driver, num_minors, *start_minor);
-		if (retval) {
-			/* someone snuck in here, so let's start looking all over again */
-			spin_lock (&minor_lock);
-			i = 0;
-			continue;
-		}
-		goto exit;
-	}
-	spin_unlock (&minor_lock);
-exit:
-	return retval;
-}
-
-/**
- * usb_deregister_dev - deregister a USB device's dynamic minor.
- * @driver: USB operations for the driver
- * @num_minors: number of minor numbers to put back.
- * @start_minor: the starting minor number
- *
- * Used in conjunction with usb_register_dev().  This function is called
- * when the USB driver is finished with the minor numbers gotten from a
- * call to usb_register_dev() (usually when the device is disconnected
- * from the system.)
- * 
- * This should be called by all drivers that use the USB major number.
- */
-void usb_deregister_dev (struct usb_driver *driver, int num_minors, int start_minor)
-{
-	usb_deregister_minors (driver, num_minors, start_minor);
-}
-#endif	/* CONFIG_USB_DYNAMIC_MINORS */
-
-
 /**
  *	usb_scan_devices - scans all unclaimed USB interfaces
  *	Context: !in_interrupt ()
@@ -298,11 +161,15 @@ static void usb_drivers_purge(struct usb_driver *driver,struct usb_device *dev)
 }
 
 /**
- *	usb_deregister - unregister a USB driver
- *	@driver: USB operations of the driver to unregister
- *	Context: !in_interrupt ()
+ * usb_deregister - unregister a USB driver
+ * @driver: USB operations of the driver to unregister
+ * Context: !in_interrupt ()
  *
- *	Unlinks the specified driver from the internal USB driver list.
+ * Unlinks the specified driver from the internal USB driver list.
+ * 
+ * NOTE: If you called usb_register_dev(), you still need to call
+ * usb_deregister_dev() to clean up your driver's allocated minor numbers,
+ * this * call will no longer do it for you.
  */
 void usb_deregister(struct usb_driver *driver)
 {
@@ -310,11 +177,6 @@ void usb_deregister(struct usb_driver *driver)
 
 	info("deregistering driver %s", driver->name);
 
-#ifndef CONFIG_USB_DYNAMIC_MINORS
-	if (driver->fops != NULL)
-		usb_deregister_minors (driver, driver->num_minors, driver->minor);
-#endif
-
 	/*
 	 * first we remove the driver, to be sure it doesn't get used by
 	 * another thread while we are stepping through removing entries
@@ -1357,55 +1219,6 @@ int usb_new_device(struct usb_device *dev)
 	return 0;
 }
 
-static int usb_open(struct inode * inode, struct file * file)
-{
-	int minor = minor(inode->i_rdev);
-	struct usb_driver *c;
-	int err = -ENODEV;
-	struct file_operations *old_fops, *new_fops = NULL;
-
-	spin_lock (&minor_lock);
-	c = usb_minors[minor];
-	spin_unlock (&minor_lock);
-
-	if (!c || !(new_fops = fops_get(c->fops)))
-		return err;
-	old_fops = file->f_op;
-	file->f_op = new_fops;
-	/* Curiouser and curiouser... NULL ->open() as "no device" ? */
-	if (file->f_op->open)
-		err = file->f_op->open(inode,file);
-	if (err) {
-		fops_put(file->f_op);
-		file->f_op = fops_get(old_fops);
-	}
-	fops_put(old_fops);
-	return err;
-}
-
-static struct file_operations usb_fops = {
-	owner:		THIS_MODULE,
-	open:		usb_open,
-};
-
-int usb_major_init(void)
-{
-	if (devfs_register_chrdev(USB_MAJOR, "usb", &usb_fops)) {
-		err("unable to get major %d for usb devices", USB_MAJOR);
-		return -EBUSY;
-	}
-
-	usb_devfs_handle = devfs_mk_dir(NULL, "usb", NULL);
-
-	return 0;
-}
-
-void usb_major_cleanup(void)
-{
-	devfs_unregister(usb_devfs_handle);
-	devfs_unregister_chrdev(USB_MAJOR, "usb");
-}
-
 
 #ifdef CONFIG_PROC_FS
 struct list_head *usb_driver_get_list(void)
@@ -1463,11 +1276,6 @@ EXPORT_SYMBOL(usb_register);
 EXPORT_SYMBOL(usb_deregister);
 EXPORT_SYMBOL(usb_scan_devices);
 
-#ifdef CONFIG_USB_DYNAMIC_MINORS
-EXPORT_SYMBOL(usb_register_dev);
-EXPORT_SYMBOL(usb_deregister_dev);
-#endif
-
 EXPORT_SYMBOL(usb_alloc_dev);
 EXPORT_SYMBOL(usb_free_dev);
 EXPORT_SYMBOL(usb_get_dev);
@@ -1489,5 +1297,4 @@ EXPORT_SYMBOL(__usb_get_extra_descriptor);
 
 EXPORT_SYMBOL(usb_get_current_frame_number);
 
-EXPORT_SYMBOL(usb_devfs_handle);
 MODULE_LICENSE("GPL");
diff --git a/include/linux/usb.h b/include/linux/usb.h
index d31eb7e942b3..b1c39c99e75f 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -104,6 +104,7 @@
 #include <linux/interrupt.h>	/* for in_interrupt() */
 #include <linux/list.h>		/* for struct list_head */
 #include <linux/device.h>	/* for struct device */
+#include <linux/fs.h>		/* for struct file_operations */
 
 
 static __inline__ void wait_ms(unsigned int ms)
@@ -648,14 +649,6 @@ struct usb_device_id {
  *	because its device has been (or is being) disconnected.  The
  *	handle passed is what was returned by probe(), or was provided
  *	to usb_driver_claim_interface().
- * @fops: USB drivers can reuse some character device framework in
- *	the USB subsystem by providing a file operations vector and
- *	a minor number.
- * @minor: Used with fops to simplify creating USB character devices.
- *	Such drivers have sixteen character devices, using the USB
- *	major number and starting with this minor number.
- * @num_minors: Used with minor to specify how many minors are used by
- *	this driver.
  * @ioctl: Used for drivers that want to talk to userspace through
  *	the "usbfs" filesystem.  This lets devices provide ways to
  *	expose information to user space regardless of where they
@@ -694,11 +687,6 @@ struct usb_driver {
 	    );
 
 	struct list_head driver_list;
-
-	struct file_operations *fops;
-	int minor;
-	int num_minors;
-
 	struct semaphore serialize;
 
 	/* ioctl -- userspace apps can talk to drivers through usbfs */
@@ -722,13 +710,8 @@ extern struct bus_type usb_bus_type;
 extern int usb_register(struct usb_driver *);
 extern void usb_deregister(struct usb_driver *);
 
-#ifndef CONFIG_USB_DYNAMIC_MINORS
-static inline int usb_register_dev(struct usb_driver *new_driver, int num_minors, int *start_minor) { return -ENODEV; }
-static inline void usb_deregister_dev(struct usb_driver *driver, int num_minors, int start_minor) {}
-#else
-extern int usb_register_dev(struct usb_driver *new_driver, int num_minors, int *start_minor);
-extern void usb_deregister_dev(struct usb_driver *driver, int num_minors, int start_minor);
-#endif
+extern int usb_register_dev(struct file_operations *fops, int minor, int num_minors, int *start_minor);
+extern void usb_deregister_dev(int num_minors, int start_minor);
 
 /* -------------------------------------------------------------------------- */
 
-- 
cgit v1.2.3