summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/ext3/balloc.c53
-rw-r--r--fs/ext3/ialloc.c127
-rw-r--r--fs/ext3/super.c47
-rw-r--r--include/linux/ext3_fs_sb.h15
4 files changed, 126 insertions, 116 deletions
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index c792e232a493..8ec3af3f6410 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -110,6 +110,7 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
struct super_block * sb;
struct ext3_group_desc * gdp;
struct ext3_super_block * es;
+ struct ext3_sb_info *sbi;
int err = 0, ret;
int dquot_freed_blocks = 0;
@@ -118,6 +119,7 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
printk ("ext3_free_blocks: nonexistent device");
return;
}
+ sbi = EXT3_SB(sb);
es = EXT3_SB(sb)->s_es;
if (block < le32_to_cpu(es->s_first_data_block) ||
block + count < block ||
@@ -242,11 +244,12 @@ do_more:
}
}
- spin_lock(bg_lock(sb, block_group));
+ spin_lock(sb_bgl_lock(sbi, block_group));
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
dquot_freed_blocks);
- spin_unlock(bg_lock(sb, block_group));
+ spin_unlock(sb_bgl_lock(sbi, block_group));
+ percpu_counter_mod(&sbi->s_freeblocks_counter, count);
/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -429,7 +432,7 @@ got:
have_access = 1;
}
- if (!claim_block(bg_lock(sb, group), goal, bitmap_bh)) {
+ if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) {
/*
* The block was allocated by another thread, or it was
* allocated and then freed by another thread
@@ -477,11 +480,11 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
int target_block; /* tmp */
int fatal = 0, err;
int performed_allocation = 0;
- int free;
- int use_reserve = 0;
+ int free_blocks, root_blocks;
struct super_block *sb;
struct ext3_group_desc *gdp;
struct ext3_super_block *es;
+ struct ext3_sb_info *sbi;
#ifdef EXT3FS_DEBUG
static int goal_hits = 0, goal_attempts = 0;
#endif
@@ -500,9 +503,19 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
return 0;
}
+ sbi = EXT3_SB(sb);
es = EXT3_SB(sb)->s_es;
ext3_debug("goal=%lu.\n", goal);
+ free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+ root_blocks = le32_to_cpu(es->s_r_blocks_count);
+ if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
+ sbi->s_resuid != current->fsuid &&
+ (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
+ *errp = -ENOSPC;
+ return 0;
+ }
+
/*
* First, test whether the goal block is free.
*/
@@ -515,9 +528,8 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
if (!gdp)
goto io_error;
- free = le16_to_cpu(gdp->bg_free_blocks_count);
- free -= EXT3_SB(sb)->s_bgi[group_no].bg_reserved;
- if (free > 0) {
+ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+ if (free_blocks > 0) {
ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
EXT3_BLOCKS_PER_GROUP(sb));
bitmap_bh = read_block_bitmap(sb, group_no);
@@ -535,7 +547,6 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
* Now search the rest of the groups. We assume that
* i and gdp correctly point to the last group visited.
*/
-repeat:
for (bgi = 0; bgi < EXT3_SB(sb)->s_groups_count; bgi++) {
group_no++;
if (group_no >= EXT3_SB(sb)->s_groups_count)
@@ -545,10 +556,8 @@ repeat:
*errp = -EIO;
goto out;
}
- free = le16_to_cpu(gdp->bg_free_blocks_count);
- if (!use_reserve)
- free -= EXT3_SB(sb)->s_bgi[group_no].bg_reserved;
- if (free <= 0)
+ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+ if (free_blocks <= 0)
continue;
brelse(bitmap_bh);
@@ -563,15 +572,6 @@ repeat:
goto allocated;
}
- if (!use_reserve &&
- (EXT3_SB(sb)->s_resuid == current->fsuid ||
- (EXT3_SB(sb)->s_resgid != 0 && in_group_p(EXT3_SB(sb)->s_resgid)) ||
- capable(CAP_SYS_RESOURCE))) {
- use_reserve = 1;
- group_no = 0;
- goto repeat;
- }
-
/* No space left on the device */
*errp = -ENOSPC;
goto out;
@@ -612,13 +612,13 @@ allocated:
}
}
#endif
- spin_lock(bg_lock(sb, group_no));
+ spin_lock(sb_bgl_lock(sbi, group_no));
if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data)
J_ASSERT_BH(bitmap_bh,
!ext3_test_bit(ret_block,
bh2jh(bitmap_bh)->b_committed_data));
ext3_debug("found bit %d\n", ret_block);
- spin_unlock(bg_lock(sb, group_no));
+ spin_unlock(sb_bgl_lock(sbi, group_no));
/* ret_block was blockgroup-relative. Now it becomes fs-relative */
ret_block = target_block;
@@ -639,10 +639,11 @@ allocated:
ext3_debug("allocating block %d. Goal hits %d of %d.\n",
ret_block, goal_hits, goal_attempts);
- spin_lock(bg_lock(sb, group_no));
+ spin_lock(sb_bgl_lock(sbi, group_no));
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
- spin_unlock(bg_lock(sb, group_no));
+ spin_unlock(sb_bgl_lock(sbi, group_no));
+ percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
err = ext3_journal_dirty_metadata(handle, gdp_bh);
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index ab12862053d7..b0c204a77252 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -97,6 +97,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
unsigned long bit;
struct ext3_group_desc * gdp;
struct ext3_super_block * es;
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
int fatal = 0, err;
if (atomic_read(&inode->i_count) > 1) {
@@ -161,13 +162,17 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
if (fatal) goto error_return;
if (gdp) {
- spin_lock(&EXT3_SB(sb)->s_bgi[block_group].bg_ialloc_lock);
+ spin_lock(sb_bgl_lock(sbi, block_group));
gdp->bg_free_inodes_count = cpu_to_le16(
le16_to_cpu(gdp->bg_free_inodes_count) + 1);
if (is_directory)
gdp->bg_used_dirs_count = cpu_to_le16(
le16_to_cpu(gdp->bg_used_dirs_count) - 1);
- spin_unlock(&EXT3_SB(sb)->s_bgi[block_group].bg_ialloc_lock);
+ spin_unlock(sb_bgl_lock(sbi, block_group));
+ percpu_counter_inc(&sbi->s_freeinodes_counter);
+ if (is_directory)
+ percpu_counter_dec(&sbi->s_dirs_counter);
+
}
BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bh2);
@@ -196,11 +201,14 @@ error_return:
static int find_group_dir(struct super_block *sb, struct inode *parent)
{
int ngroups = EXT3_SB(sb)->s_groups_count;
- int avefreei = ext3_count_free_inodes(sb) / ngroups;
+ int freei, avefreei;
struct ext3_group_desc *desc, *best_desc = NULL;
struct buffer_head *bh;
int group, best_group = -1;
+ freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
+ avefreei = freei / ngroups;
+
for (group = 0; group < ngroups; group++) {
desc = ext3_get_group_desc (sb, group, &bh);
if (!desc || !desc->bg_free_inodes_count)
@@ -252,17 +260,20 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
struct ext3_super_block *es = sbi->s_es;
int ngroups = sbi->s_groups_count;
int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
- int freei = ext3_count_free_inodes(sb);
- int avefreei = freei / ngroups;
- int freeb = ext3_count_free_blocks(sb);
- int avefreeb = freeb / ngroups;
- int blocks_per_dir;
- int ndirs = ext3_count_dirs(sb);
+ int freei, avefreei;
+ int freeb, avefreeb;
+ int blocks_per_dir, ndirs;
int max_debt, max_dirs, min_blocks, min_inodes;
int group = -1, i;
struct ext3_group_desc *desc;
struct buffer_head *bh;
+ freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
+ avefreei = freei / ngroups;
+ freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+ avefreeb = freeb / ngroups;
+ ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
+
if ((parent == sb->s_root->d_inode) ||
(parent->i_flags & EXT3_TOPDIR_FL)) {
int best_ndir = inodes_per_group;
@@ -289,8 +300,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
goto fallback;
}
- blocks_per_dir = (le32_to_cpu(es->s_blocks_count) -
- le32_to_cpu(es->s_free_blocks_count)) / ndirs;
+ blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs;
max_dirs = ndirs / ngroups + inodes_per_group / 16;
min_inodes = avefreei - inodes_per_group / 4;
@@ -309,7 +319,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
desc = ext3_get_group_desc (sb, group, &bh);
if (!desc || !desc->bg_free_inodes_count)
continue;
- if (sbi->s_bgi[group].bg_debts >= max_debt)
+ if (sbi->s_debts[group] >= max_debt)
continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
continue;
@@ -416,13 +426,15 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2;
int group;
- unsigned long ino;
+ unsigned long ino = 0;
struct inode * inode;
- struct ext3_group_desc * gdp;
+ struct ext3_group_desc * gdp = NULL;
struct ext3_super_block * es;
struct ext3_inode_info *ei;
+ struct ext3_sb_info *sbi;
int err = 0;
struct inode *ret;
+ int i;
/* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink)
@@ -435,7 +447,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
ei = EXT3_I(inode);
es = EXT3_SB(sb)->s_es;
-repeat:
+ sbi = EXT3_SB(sb);
if (S_ISDIR(mode)) {
if (test_opt (sb, OLDALLOC))
group = find_group_dir(sb, dir);
@@ -448,46 +460,52 @@ repeat:
if (group == -1)
goto out;
- err = -EIO;
- brelse(bitmap_bh);
- bitmap_bh = read_inode_bitmap(sb, group);
- if (!bitmap_bh)
- goto fail;
- gdp = ext3_get_group_desc (sb, group, &bh2);
-
- if ((ino = ext3_find_first_zero_bit((unsigned long *)bitmap_bh->b_data,
- EXT3_INODES_PER_GROUP(sb))) <
- EXT3_INODES_PER_GROUP(sb)) {
- BUFFER_TRACE(bitmap_bh, "get_write_access");
- err = ext3_journal_get_write_access(handle, bitmap_bh);
- if (err) goto fail;
-
- if (ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
- ino, bitmap_bh->b_data))
- goto repeat;
- BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
- err = ext3_journal_dirty_metadata(handle, bitmap_bh);
- if (err) goto fail;
- } else {
- if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) {
- ext3_error (sb, "ext3_new_inode",
- "Free inodes count corrupted in group %d",
- group);
- /* Is it really ENOSPC? */
- err = -ENOSPC;
- if (sb->s_flags & MS_RDONLY)
+ for (i = 0; i < sbi->s_groups_count; i++) {
+ gdp = ext3_get_group_desc(sb, group, &bh2);
+
+ err = -EIO;
+ brelse(bitmap_bh);
+ bitmap_bh = read_inode_bitmap(sb, group);
+ if (!bitmap_bh)
+ goto fail;
+
+ ino = ext3_find_first_zero_bit((unsigned long *)
+ bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb));
+ if (ino < EXT3_INODES_PER_GROUP(sb)) {
+ BUFFER_TRACE(bitmap_bh, "get_write_access");
+ err = ext3_journal_get_write_access(handle, bitmap_bh);
+ if (err)
goto fail;
- BUFFER_TRACE(bh2, "get_write_access");
- err = ext3_journal_get_write_access(handle, bh2);
- if (err) goto fail;
- gdp->bg_free_inodes_count = 0;
- BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
- err = ext3_journal_dirty_metadata(handle, bh2);
- if (err) goto fail;
+ if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
+ ino, bitmap_bh->b_data)) {
+ /* we won it */
+ BUFFER_TRACE(bitmap_bh,
+ "call ext3_journal_dirty_metadata");
+ err = ext3_journal_dirty_metadata(handle,
+ bitmap_bh);
+ if (err)
+ goto fail;
+ goto got;
+ }
+ /* we lost it */
+ journal_release_buffer(handle, bitmap_bh);
}
- goto repeat;
+
+ /*
+ * This case is possible in concurrent environment. It is very
+ * rare. We cannot repeat the find_group_xxx() call because
+ * that will simply return the same blockgroup, because the
+ * group descriptor metadata has not yet been updated.
+ * So we just go onto the next blockgroup.
+ */
+ if (++group == sbi->s_groups_count)
+ group = 0;
}
+ err = -ENOSPC;
+ goto out;
+
+got:
ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
ext3_error (sb, "ext3_new_inode",
@@ -500,18 +518,21 @@ repeat:
BUFFER_TRACE(bh2, "get_write_access");
err = ext3_journal_get_write_access(handle, bh2);
if (err) goto fail;
- spin_lock(&EXT3_SB(sb)->s_bgi[group].bg_ialloc_lock);
+ spin_lock(sb_bgl_lock(sbi, group));
gdp->bg_free_inodes_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
if (S_ISDIR(mode)) {
gdp->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
}
- spin_unlock(&EXT3_SB(sb)->s_bgi[group].bg_ialloc_lock);
+ spin_unlock(sb_bgl_lock(sbi, group));
BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bh2);
if (err) goto fail;
+ percpu_counter_dec(&sbi->s_freeinodes_counter);
+ if (S_ISDIR(mode))
+ percpu_counter_inc(&sbi->s_dirs_counter);
sb->s_dirt = 1;
inode->i_uid = current->fsuid;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index cc3dfd7e907c..3ae758ed667c 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -460,7 +460,7 @@ void ext3_put_super (struct super_block * sb)
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
- kfree(sbi->s_bgi);
+ kfree(sbi->s_debts);
brelse(sbi->s_sbh);
/* Debugging code just in case the in-memory inode orphan list
@@ -902,7 +902,6 @@ static int ext3_check_descriptors (struct super_block * sb)
unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block);
struct ext3_group_desc * gdp = NULL;
unsigned long total_free;
- unsigned int reserved = le32_to_cpu(sbi->s_es->s_r_blocks_count);
int desc_block = 0;
int i;
@@ -958,25 +957,6 @@ static int ext3_check_descriptors (struct super_block * sb)
EXT3_SB(sb)->s_es->s_free_blocks_count = cpu_to_le32(total_free);
}
- /* distribute reserved blocks over groups -bzzz */
- for(i = sbi->s_groups_count - 1; reserved && total_free && i >= 0; i--) {
- int free;
-
- gdp = ext3_get_group_desc (sb, i, NULL);
- if (!gdp) {
- ext3_error (sb, "ext3_check_descriptors",
- "cant get descriptor for group %d", i);
- return 0;
- }
-
- free = le16_to_cpu(gdp->bg_free_blocks_count);
- if (free > reserved)
- free = reserved;
- sbi->s_bgi[i].bg_reserved = free;
- reserved -= free;
- total_free -= free;
- }
-
total_free = ext3_count_free_inodes(sb);
if (total_free != le32_to_cpu(EXT3_SB(sb)->s_es->s_free_inodes_count)) {
printk("EXT3-fs: invalid s_free_inodes_count %u (real %lu)\n",
@@ -1346,17 +1326,19 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
printk (KERN_ERR "EXT3-fs: not enough memory\n");
goto failed_mount;
}
- sbi->s_bgi = kmalloc(sbi->s_groups_count * sizeof(struct ext3_bg_info),
+ sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(u8),
GFP_KERNEL);
- if (!sbi->s_bgi) {
+ if (!sbi->s_debts) {
printk("EXT3-fs: not enough memory to allocate s_bgi\n");
goto failed_mount2;
}
- memset(sbi->s_bgi, 0, sbi->s_groups_count * sizeof(struct ext3_bg_info));
- for (i = 0; i < sbi->s_groups_count; i++) {
- spin_lock_init(&sbi->s_bgi[i].bg_balloc_lock);
- spin_lock_init(&sbi->s_bgi[i].bg_ialloc_lock);
- }
+ memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(u8));
+
+ percpu_counter_init(&sbi->s_freeblocks_counter);
+ percpu_counter_init(&sbi->s_freeinodes_counter);
+ percpu_counter_init(&sbi->s_dirs_counter);
+ bgl_lock_init(&sbi->s_blockgroup_lock);
+
for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logic_sb_block, i);
sbi->s_group_desc[i] = sb_bread(sb, block);
@@ -1469,12 +1451,19 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
"writeback");
+ percpu_counter_mod(&sbi->s_freeblocks_counter,
+ ext3_count_free_blocks(sb));
+ percpu_counter_mod(&sbi->s_freeinodes_counter,
+ ext3_count_free_inodes(sb));
+ percpu_counter_mod(&sbi->s_dirs_counter,
+ ext3_count_dirs(sb));
+
return 0;
failed_mount3:
journal_destroy(sbi->s_journal);
failed_mount2:
- kfree(sbi->s_bgi);
+ kfree(sbi->s_debts);
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index 6d53e5cac0ac..e9b4012cc776 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -19,15 +19,10 @@
#ifdef __KERNEL__
#include <linux/timer.h>
#include <linux/wait.h>
+#include <linux/blockgroup_lock.h>
+#include <linux/percpu_counter.h>
#endif
-struct ext3_bg_info {
- u8 bg_debts;
- spinlock_t bg_balloc_lock;
- spinlock_t bg_ialloc_lock;
- unsigned long bg_reserved;
-} ____cacheline_aligned_in_smp;
-
/*
* third extended-fs super-block data in memory
*/
@@ -57,7 +52,11 @@ struct ext3_sb_info {
u32 s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
- struct ext3_bg_info *s_bgi;
+ u8 *s_debts;
+ struct percpu_counter s_freeblocks_counter;
+ struct percpu_counter s_freeinodes_counter;
+ struct percpu_counter s_dirs_counter;
+ struct blockgroup_lock s_blockgroup_lock;
/* Journaling */
struct inode * s_journal_inode;