summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Morton <akpm@digeo.com>2002-10-04 20:35:54 -0700
committerRussell King <rmk@flint.arm.linux.org.uk>2002-10-04 20:35:54 -0700
commit3669e82478d2eebd95bf1e23aad28eb11a0262fc (patch)
treec703166699a14fcbf7608f360df67a8050d4f4bb
parenta27efcaff9ffd5ad05f4e111751da41a8820f7ab (diff)
[PATCH] stricter dirty memory clamping
The ratelimiting logic in balance_dirty_pages_ratelimited() is designed to prevent excessive calls to the expensive get_page_state(): On a big machine we only check to see if we're over dirty memory limits once per 1024 dirtyings per cpu. This works OK normally, but it has the effect of allowing each process to go 1024 pages over the dirty limit before it gets throttled. So if someone runs 16000 tiobench threads, they can go 16G over the dirty memory threshold and die the death of buffer_head consumption. Because page dirtiness pins the page's buffer_heads, defeating the special buffer_head reclaim logic. I'd left this overshoot artifact in place because it provides a degree of adaptivity - of someone if running hundreds of dirtying processes (dbench!) then they do want to overshoot the dirty memory limit. But it's hard to balance, and is really not worth the futzing around. So change the logic to only perform the get_page_state() call rate limiting if we're known to be under the dirty memory threshold.
-rw-r--r--mm/page-writeback.c55
1 files changed, 28 insertions, 27 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b67090c6f678..b8f9c354cffb 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -41,10 +41,8 @@
*/
static long ratelimit_pages = 32;
-/*
- * The total number of pages in the machine.
- */
-static long total_pages;
+static long total_pages; /* The total number of pages in the machine. */
+static int dirty_exceeded; /* Dirty mem may be over limit */
/*
* When balance_dirty_pages decides that the caller needs to perform some
@@ -60,16 +58,12 @@ static inline long sync_writeback_pages(void)
/* The following parameters are exported via /proc/sys/vm */
/*
- * Dirty memory thresholds, in percentages
- */
-
-/*
- * Start background writeback (via pdflush) at this level
+ * Start background writeback (via pdflush) at this percentage
*/
int dirty_background_ratio = 10;
/*
- * The generator of dirty data starts async writeback at this level
+ * The generator of dirty data starts async writeback at this percentage
*/
int dirty_async_ratio = 40;
@@ -80,7 +74,7 @@ int dirty_async_ratio = 40;
int dirty_writeback_centisecs = 5 * 100;
/*
- * The longest amount of time for which data is allowed to remain dirty
+ * The longest number of centiseconds for which data is allowed to remain dirty
*/
int dirty_expire_centisecs = 30 * 100;
@@ -90,22 +84,17 @@ int dirty_expire_centisecs = 30 * 100;
static void background_writeout(unsigned long _min_pages);
/*
- * balance_dirty_pages() must be called by processes which are
- * generating dirty data. It looks at the number of dirty pages
- * in the machine and either:
- *
- * - Starts background writeback or
- * - Causes the caller to perform async writeback or
- * - Causes the caller to perform synchronous writeback, then
- * tells a pdflush thread to perform more writeback or
- * - Does nothing at all.
- *
- * balance_dirty_pages() can sleep.
+ * balance_dirty_pages() must be called by processes which are generating dirty
+ * data. It looks at the number of dirty pages in the machine and will force
+ * the caller to perform writeback if the system is over `async_thresh'.
+ * If we're over `background_thresh' then pdflush is woken to perform some
+ * writeout.
*/
void balance_dirty_pages(struct address_space *mapping)
{
struct page_state ps;
- long background_thresh, async_thresh;
+ long background_thresh;
+ long async_thresh;
unsigned long dirty_and_writeback;
struct backing_dev_info *bdi;
@@ -123,9 +112,13 @@ void balance_dirty_pages(struct address_space *mapping)
.older_than_this = NULL,
.nr_to_write = sync_writeback_pages(),
};
-
+ if (!dirty_exceeded)
+ dirty_exceeded = 1;
writeback_inodes(&wbc);
get_page_state(&ps);
+ } else {
+ if (dirty_exceeded)
+ dirty_exceeded = 0;
}
if (!writeback_in_progress(bdi) && ps.nr_dirty > background_thresh)
@@ -141,17 +134,25 @@ EXPORT_SYMBOL_GPL(balance_dirty_pages);
* which was newly dirtied. The function will periodically check the system's
* dirty state and will initiate writeback if needed.
*
- * balance_dirty_pages_ratelimited() may sleep.
+ * On really big machines, get_page_state is expensive, so try to avoid calling
+ * it too often (ratelimiting). But once we're over the dirty memory limit we
+ * decrease the ratelimiting by a lot, to prevent individual processes from
+ * overshooting the limit by (ratelimit_pages) each.
*/
void balance_dirty_pages_ratelimited(struct address_space *mapping)
{
static struct rate_limit_struct {
int count;
- } ____cacheline_aligned ratelimits[NR_CPUS];
+ } ____cacheline_aligned_in_smp ratelimits[NR_CPUS];
int cpu;
+ long ratelimit;
+
+ ratelimit = ratelimit_pages;
+ if (dirty_exceeded)
+ ratelimit = 8;
cpu = get_cpu();
- if (ratelimits[cpu].count++ >= ratelimit_pages) {
+ if (ratelimits[cpu].count++ >= ratelimit) {
ratelimits[cpu].count = 0;
put_cpu();
balance_dirty_pages(mapping);