[PATCH] better inode reclaim balancing

The inode reclaim is too aggressive at present - it is causing the shootdown of lots of recently-used pagecache. Simple testcase: run a huge `dd' while running a concurrent `watch -n1 cat /proc/meminfo'. The program text for `cat' gets loaded from disk once per second. This is in fact because the dentry_unused reclaim is too aggressive. (The general approach to inode reclaim is that it _not_ happen at the inode level. All the aging and lru activity happens at the dcache level.) The problem is partly due to a bug: shrink_dcache_memory() is returning the *total* number of dentries to the VM, rather than the number of unused dentries. This patch fixes that, and goes a little further. We do want to keep some unused dentries around. Reclaiming the last few thousand dentries is pretty pointless, and will allow reclaim of the last few thousand inodes and their attached pagecache. So the algorithm I have used is to not allow the number of unused dentries to fall below the number of used ones. This keeps a reasonable number of dentries in cache while providing a level of scaling to the system size and the current system activity. (Magic number alert: why not pin nr_unused to seven times nr_used, rather than one times??) shrink_dcache_memory() has been changed to tell the VM that the number of shrinkable dentries is: zero if (nr_unused < nr_used) otherwise (nr_unused - nr_used) so when there is memory pressure the VM will prune the unused dentry cache down to the size of the used dentry cache, but not below that. The patch also arranges (awkwardly) for all modifications of dentry_stat.nr_dentry to occur inside dcache_lock - it was racy.
author: Andrew Morton <akpm@digeo.com> 2002-11-15 18:53:12 -0800
committer: Linus Torvalds <torvalds@home.transmeta.com> 2002-11-15 18:53:12 -0800
commit: 9c716856346cd1a701ac38e07aafce8d60e1fc0e (patch)
tree: b53b8fbb59a65f0c79c2fa2788a4b40d34ff421d
parent: b084fe4bb9f8ed890a758ca37faba8aec8bce7ee (diff)
1 files changed, 36 insertions, 10 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index 38ea3d78ab6a..90c54af6f9ff 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -55,15 +55,17 @@ struct dentry_stat_t dentry_stat = {
 	.age_limit = 45,
 };
 
-/* no dcache_lock, please */
-static inline void d_free(struct dentry *dentry)
+/*
+ * no dcache_lock, please.  The caller must decrement dentry_stat.nr_dentry
+ * inside dcache_lock.
+ */
+static void d_free(struct dentry *dentry)
 {
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
 	if (dname_external(dentry)) 
 		kfree(dentry->d_name.name);
 	kmem_cache_free(dentry_cache, dentry); 
-	dentry_stat.nr_dentry--;
 }
 
 /*
@@ -149,6 +151,7 @@ unhash_it:
 kill_it: {
 		struct dentry *parent;
 		list_del(&dentry->d_child);
+		dentry_stat.nr_dentry--;	/* For d_free, below */
 		/* drops the lock, at that point nobody can reach this dentry */
 		dentry_iput(dentry);
 		parent = dentry->d_parent;
@@ -307,6 +310,7 @@ static inline void prune_one_dentry(struct dentry * dentry)
 
 	list_del_init(&dentry->d_hash);
 	list_del(&dentry->d_child);
+	dentry_stat.nr_dentry--;	/* For d_free, below */
 	dentry_iput(dentry);
 	parent = dentry->d_parent;
 	d_free(dentry);
@@ -569,11 +573,25 @@ void shrink_dcache_anon(struct list_head *head)
 }
 
 /*
- * This is called from kswapd when we think we need some
- * more memory. 
+ * This is called from kswapd when we think we need some more memory.
+ *
+ * We don't want the VM to steal _all_ unused dcache.  Because that leads to
+ * the VM stealing all unused inodes, which shoots down recently-used
+ * pagecache.  So what we do is to tell fibs to the VM about how many reapable
+ * objects there are in this cache.   If the number of unused dentries is
+ * less than half of the total dentry count then return zero.  The net effect
+ * is that the number of unused dentries will be, at a minimum, equal to the
+ * number of used ones.
+ *
+ * If unused_ratio is set to 5, the number of unused dentries will not fall
+ * below 5* the number of used ones.
  */
 static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
 {
+	int nr_used;
+	int nr_unused;
+	const int unused_ratio = 1;
+
 	if (nr) {
 		/*
 		 * Nasty deadlock avoidance.
@@ -589,7 +607,11 @@ static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
 		if (gfp_mask & __GFP_FS)
 			prune_dcache(nr);
 	}
-	return dentry_stat.nr_dentry;
+	nr_unused = dentry_stat.nr_unused;
+	nr_used = dentry_stat.nr_dentry - nr_unused;
+	if (nr_unused < nr_used * unused_ratio)
+		return 0;
+	return nr_unused - nr_used * unused_ratio;
 }
 
 #define NAME_ALLOC_LEN(len)	((len+16) & ~15)
@@ -642,16 +664,20 @@ struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
 	INIT_LIST_HEAD(&dentry->d_alias);
+
 	if (parent) {
 		dentry->d_parent = dget(parent);
 		dentry->d_sb = parent->d_sb;
-		spin_lock(&dcache_lock);
-		list_add(&dentry->d_child, &parent->d_subdirs);
-		spin_unlock(&dcache_lock);
-	} else
+	} else {
 		INIT_LIST_HEAD(&dentry->d_child);
+	}
 
+	spin_lock(&dcache_lock);
+	if (parent)
+		list_add(&dentry->d_child, &parent->d_subdirs);
 	dentry_stat.nr_dentry++;
+	spin_unlock(&dcache_lock);
+
 	return dentry;
 }
author	Andrew Morton <akpm@digeo.com>	2002-11-15 18:53:12 -0800
committer	Linus Torvalds <torvalds@home.transmeta.com>	2002-11-15 18:53:12 -0800
commit	9c716856346cd1a701ac38e07aafce8d60e1fc0e (patch)
tree	b53b8fbb59a65f0c79c2fa2788a4b40d34ff421d
parent	b084fe4bb9f8ed890a758ca37faba8aec8bce7ee (diff)