summaryrefslogtreecommitdiff
path: root/fs/btrfs/space-info.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/space-info.c')
-rw-r--r--fs/btrfs/space-info.c464
1 files changed, 245 insertions, 219 deletions
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 97452fb5d29b..6babbe333741 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -15,6 +15,7 @@
#include "accessors.h"
#include "extent-tree.h"
#include "zoned.h"
+#include "delayed-inode.h"
/*
* HOW DOES SPACE RESERVATION WORK
@@ -67,7 +68,7 @@
* Assume we are unable to simply make the reservation because we do not have
* enough space
*
- * -> __reserve_bytes
+ * -> reserve_bytes
* create a reserve_ticket with ->bytes set to our reservation, add it to
* the tail of space_info->tickets, kick async flush thread
*
@@ -172,15 +173,14 @@
* thing with or without extra unallocated space.
*/
-u64 __pure btrfs_space_info_used(const struct btrfs_space_info *s_info,
- bool may_use_included)
-{
- ASSERT(s_info);
- return s_info->bytes_used + s_info->bytes_reserved +
- s_info->bytes_pinned + s_info->bytes_readonly +
- s_info->bytes_zone_unusable +
- (may_use_included ? s_info->bytes_may_use : 0);
-}
+struct reserve_ticket {
+ u64 bytes;
+ int error;
+ bool steal;
+ struct list_head list;
+ wait_queue_head_t wait;
+ spinlock_t lock;
+};
/*
* after adding space to the filesystem, we need to clear the full flags
@@ -192,7 +192,7 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
struct btrfs_space_info *found;
list_for_each_entry(found, head, list)
- found->full = 0;
+ found->full = false;
}
/*
@@ -211,7 +211,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
if (btrfs_is_zoned(fs_info))
return fs_info->zone_size;
- ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+ ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK, "flags=%llu", flags);
if (flags & BTRFS_BLOCK_GROUP_DATA)
return BTRFS_MAX_DATA_CHUNK_SIZE;
@@ -262,8 +262,9 @@ static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flag
struct btrfs_space_info *sub_group;
int ret;
- ASSERT(parent->subgroup_id == BTRFS_SUB_GROUP_PRIMARY);
- ASSERT(id != BTRFS_SUB_GROUP_PRIMARY);
+ ASSERT(parent->subgroup_id == BTRFS_SUB_GROUP_PRIMARY,
+ "parent->subgroup_id=%d", parent->subgroup_id);
+ ASSERT(id != BTRFS_SUB_GROUP_PRIMARY, "id=%d", id);
sub_group = kzalloc(sizeof(*sub_group), GFP_NOFS);
if (!sub_group)
@@ -274,7 +275,7 @@ static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flag
sub_group->parent = parent;
sub_group->subgroup_id = id;
- ret = btrfs_sysfs_add_space_info_type(fs_info, sub_group);
+ ret = btrfs_sysfs_add_space_info_type(sub_group);
if (ret) {
kfree(sub_group);
parent->sub_group[index] = NULL;
@@ -308,7 +309,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
return ret;
}
- ret = btrfs_sysfs_add_space_info_type(info, space_info);
+ ret = btrfs_sysfs_add_space_info_type(space_info);
if (ret)
return ret;
@@ -372,8 +373,8 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
space_info->bytes_readonly += block_group->bytes_super;
btrfs_space_info_update_bytes_zone_unusable(space_info, block_group->zone_unusable);
if (block_group->length > 0)
- space_info->full = 0;
- btrfs_try_granting_tickets(info, space_info);
+ space_info->full = false;
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
block_group->space_info = space_info;
@@ -421,10 +422,10 @@ static u64 calc_effective_data_chunk_size(struct btrfs_fs_info *fs_info)
return min_t(u64, data_chunk_size, SZ_1G);
}
-static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *space_info,
- enum btrfs_reserve_flush_enum flush)
+static u64 calc_available_free_space(const struct btrfs_space_info *space_info,
+ enum btrfs_reserve_flush_enum flush)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
u64 profile;
u64 avail;
u64 data_chunk_size;
@@ -490,44 +491,77 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
return avail;
}
-int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *space_info, u64 bytes,
- enum btrfs_reserve_flush_enum flush)
+static inline bool check_can_overcommit(const struct btrfs_space_info *space_info,
+ u64 space_info_used_bytes, u64 bytes,
+ enum btrfs_reserve_flush_enum flush)
+{
+ const u64 avail = calc_available_free_space(space_info, flush);
+
+ return (space_info_used_bytes + bytes < space_info->total_bytes + avail);
+}
+
+static inline bool can_overcommit(const struct btrfs_space_info *space_info,
+ u64 space_info_used_bytes, u64 bytes,
+ enum btrfs_reserve_flush_enum flush)
+{
+ /* Don't overcommit when in mixed mode. */
+ if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
+ return false;
+
+ return check_can_overcommit(space_info, space_info_used_bytes, bytes, flush);
+}
+
+bool btrfs_can_overcommit(const struct btrfs_space_info *space_info, u64 bytes,
+ enum btrfs_reserve_flush_enum flush)
{
- u64 avail;
u64 used;
/* Don't overcommit when in mixed mode */
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
- return 0;
+ return false;
used = btrfs_space_info_used(space_info, true);
- avail = calc_available_free_space(fs_info, space_info, flush);
- if (used + bytes < space_info->total_bytes + avail)
- return 1;
- return 0;
+ return check_can_overcommit(space_info, used, bytes, flush);
}
static void remove_ticket(struct btrfs_space_info *space_info,
- struct reserve_ticket *ticket)
+ struct reserve_ticket *ticket, int error)
{
+ lockdep_assert_held(&space_info->lock);
+
if (!list_empty(&ticket->list)) {
list_del_init(&ticket->list);
- ASSERT(space_info->reclaim_size >= ticket->bytes);
+ ASSERT(space_info->reclaim_size >= ticket->bytes,
+ "space_info->reclaim_size=%llu ticket->bytes=%llu",
+ space_info->reclaim_size, ticket->bytes);
space_info->reclaim_size -= ticket->bytes;
}
+
+ spin_lock(&ticket->lock);
+ /*
+ * If we are called from a task waiting on the ticket, it may happen
+ * that before it sets an error on the ticket, a reclaim task was able
+ * to satisfy the ticket. In that case ignore the error.
+ */
+ if (error && ticket->bytes > 0)
+ ticket->error = error;
+ else
+ ticket->bytes = 0;
+
+ wake_up(&ticket->wait);
+ spin_unlock(&ticket->lock);
}
/*
* This is for space we already have accounted in space_info->bytes_may_use, so
* basically when we're returning space from block_rsv's.
*/
-void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info)
+void btrfs_try_granting_tickets(struct btrfs_space_info *space_info)
{
struct list_head *head;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
+ u64 used = btrfs_space_info_used(space_info, true);
lockdep_assert_held(&space_info->lock);
@@ -535,19 +569,18 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
again:
while (!list_empty(head)) {
struct reserve_ticket *ticket;
- u64 used = btrfs_space_info_used(space_info, true);
+ u64 used_after;
ticket = list_first_entry(head, struct reserve_ticket, list);
+ used_after = used + ticket->bytes;
/* Check and see if our ticket can be satisfied now. */
- if ((used + ticket->bytes <= space_info->total_bytes) ||
- btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
- flush)) {
+ if (used_after <= space_info->total_bytes ||
+ can_overcommit(space_info, used, ticket->bytes, flush)) {
btrfs_space_info_update_bytes_may_use(space_info, ticket->bytes);
- remove_ticket(space_info, ticket);
- ticket->bytes = 0;
+ remove_ticket(space_info, ticket, 0);
space_info->tickets_id++;
- wake_up(&ticket->wait);
+ used = used_after;
} else {
break;
}
@@ -594,9 +627,9 @@ static void dump_global_block_rsv(struct btrfs_fs_info *fs_info)
DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
}
-static void __btrfs_dump_space_info(const struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *info)
+static void __btrfs_dump_space_info(const struct btrfs_space_info *info)
{
+ const struct btrfs_fs_info *fs_info = info->fs_info;
const char *flag_str = space_info_flag_to_str(info);
lockdep_assert_held(&info->lock);
@@ -613,16 +646,16 @@ static void __btrfs_dump_space_info(const struct btrfs_fs_info *fs_info,
info->bytes_readonly, info->bytes_zone_unusable);
}
-void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *info, u64 bytes,
+void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
bool dump_block_groups)
{
+ struct btrfs_fs_info *fs_info = info->fs_info;
struct btrfs_block_group *cache;
u64 total_avail = 0;
int index = 0;
spin_lock(&info->lock);
- __btrfs_dump_space_info(fs_info, info);
+ __btrfs_dump_space_info(info);
dump_global_block_rsv(fs_info);
spin_unlock(&info->lock);
@@ -670,11 +703,11 @@ static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info,
/*
* shrink metadata reservation for delalloc
*/
-static void shrink_delalloc(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
+static void shrink_delalloc(struct btrfs_space_info *space_info,
u64 to_reclaim, bool wait_ordered,
bool for_preempt)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
struct btrfs_trans_handle *trans;
u64 delalloc_bytes;
u64 ordered_bytes;
@@ -801,10 +834,10 @@ skip_async:
* and may fail for various reasons. The caller is supposed to examine the
* state of @space_info to detect the outcome.
*/
-static void flush_space(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info, u64 num_bytes,
- enum btrfs_flush_state state, bool for_preempt)
+static void flush_space(struct btrfs_space_info *space_info, u64 num_bytes,
+ enum btrfs_flush_state state, bool for_preempt)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_trans_handle *trans;
int nr;
@@ -833,7 +866,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
case FLUSH_DELALLOC_FULL:
if (state == FLUSH_DELALLOC_FULL)
num_bytes = U64_MAX;
- shrink_delalloc(fs_info, space_info, num_bytes,
+ shrink_delalloc(space_info, num_bytes,
state != FLUSH_DELALLOC, for_preempt);
break;
case FLUSH_DELAYED_REFS_NR:
@@ -900,8 +933,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
return;
}
-static u64 btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *space_info)
+static u64 btrfs_calc_reclaim_metadata_size(const struct btrfs_space_info *space_info)
{
u64 used;
u64 avail;
@@ -909,8 +941,7 @@ static u64 btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
lockdep_assert_held(&space_info->lock);
- avail = calc_available_free_space(fs_info, space_info,
- BTRFS_RESERVE_FLUSH_ALL);
+ avail = calc_available_free_space(space_info, BTRFS_RESERVE_FLUSH_ALL);
used = btrfs_space_info_used(space_info, true);
/*
@@ -925,18 +956,25 @@ static u64 btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
return to_reclaim;
}
-static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *space_info)
+static bool need_preemptive_reclaim(const struct btrfs_space_info *space_info)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv);
u64 ordered, delalloc;
u64 thresh;
u64 used;
- thresh = mult_perc(space_info->total_bytes, 90);
-
lockdep_assert_held(&space_info->lock);
+ /*
+ * We have tickets queued, bail so we don't compete with the async
+ * flushers.
+ */
+ if (space_info->reclaim_size)
+ return false;
+
+ thresh = mult_perc(space_info->total_bytes, 90);
+
/* If we're just plain full then async reclaim just slows us down. */
if ((space_info->bytes_used + space_info->bytes_reserved +
global_rsv_size) >= thresh)
@@ -957,13 +995,6 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
return false;
/*
- * We have tickets queued, bail so we don't compete with the async
- * flushers.
- */
- if (space_info->reclaim_size)
- return false;
-
- /*
* If we have over half of the free space occupied by reservations or
* pinned then we want to start flushing.
*
@@ -992,8 +1023,7 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
* much delalloc we need for the background flusher to kick in.
*/
- thresh = calc_available_free_space(fs_info, space_info,
- BTRFS_RESERVE_FLUSH_ALL);
+ thresh = calc_available_free_space(space_info, BTRFS_RESERVE_FLUSH_ALL);
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_readonly + global_rsv_size;
if (used < space_info->total_bytes)
@@ -1037,13 +1067,15 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
}
-static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
+static bool steal_from_global_rsv(struct btrfs_space_info *space_info,
struct reserve_ticket *ticket)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
u64 min_bytes;
+ lockdep_assert_held(&space_info->lock);
+
if (!ticket->steal)
return false;
@@ -1057,21 +1089,19 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
return false;
}
global_rsv->reserved -= ticket->bytes;
- remove_ticket(space_info, ticket);
- ticket->bytes = 0;
- wake_up(&ticket->wait);
- space_info->tickets_id++;
if (global_rsv->reserved < global_rsv->size)
- global_rsv->full = 0;
+ global_rsv->full = false;
spin_unlock(&global_rsv->lock);
+ remove_ticket(space_info, ticket, 0);
+ space_info->tickets_id++;
+
return true;
}
/*
* We've exhausted our flushing, start failing tickets.
*
- * @fs_info - fs_info for this fs
* @space_info - the space info we were flushing
*
* We call this when we've exhausted our flushing ability and haven't made
@@ -1084,47 +1114,44 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
* other tickets, or if it stumbles across a ticket that was smaller than the
* first ticket.
*/
-static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info)
+static bool maybe_fail_all_tickets(struct btrfs_space_info *space_info)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
struct reserve_ticket *ticket;
u64 tickets_id = space_info->tickets_id;
- const bool aborted = BTRFS_FS_ERROR(fs_info);
+ const int abort_error = BTRFS_FS_ERROR(fs_info);
trace_btrfs_fail_all_tickets(fs_info, space_info);
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
btrfs_info(fs_info, "cannot satisfy tickets, dumping space info");
- __btrfs_dump_space_info(fs_info, space_info);
+ __btrfs_dump_space_info(space_info);
}
while (!list_empty(&space_info->tickets) &&
tickets_id == space_info->tickets_id) {
ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
+ if (unlikely(abort_error)) {
+ remove_ticket(space_info, ticket, abort_error);
+ } else {
+ if (steal_from_global_rsv(space_info, ticket))
+ return true;
- if (!aborted && steal_from_global_rsv(fs_info, space_info, ticket))
- return true;
-
- if (!aborted && btrfs_test_opt(fs_info, ENOSPC_DEBUG))
- btrfs_info(fs_info, "failing ticket with %llu bytes",
- ticket->bytes);
+ if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
+ btrfs_info(fs_info, "failing ticket with %llu bytes",
+ ticket->bytes);
- remove_ticket(space_info, ticket);
- if (aborted)
- ticket->error = -EIO;
- else
- ticket->error = -ENOSPC;
- wake_up(&ticket->wait);
+ remove_ticket(space_info, ticket, -ENOSPC);
- /*
- * We're just throwing tickets away, so more flushing may not
- * trip over btrfs_try_granting_tickets, so we need to call it
- * here to see if we can make progress with the next ticket in
- * the list.
- */
- if (!aborted)
- btrfs_try_granting_tickets(fs_info, space_info);
+ /*
+ * We're just throwing tickets away, so more flushing may
+ * not trip over btrfs_try_granting_tickets, so we need
+ * to call it here to see if we can make progress with
+ * the next ticket in the list.
+ */
+ btrfs_try_granting_tickets(space_info);
+ }
}
return (tickets_id != space_info->tickets_id);
}
@@ -1144,9 +1171,9 @@ static void do_async_reclaim_metadata_space(struct btrfs_space_info *space_info)
final_state = COMMIT_TRANS;
spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(space_info);
if (!to_reclaim) {
- space_info->flush = 0;
+ space_info->flush = false;
spin_unlock(&space_info->lock);
return;
}
@@ -1155,15 +1182,14 @@ static void do_async_reclaim_metadata_space(struct btrfs_space_info *space_info)
flush_state = FLUSH_DELAYED_ITEMS_NR;
do {
- flush_space(fs_info, space_info, to_reclaim, flush_state, false);
+ flush_space(space_info, to_reclaim, flush_state, false);
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
- space_info->flush = 0;
+ space_info->flush = false;
spin_unlock(&space_info->lock);
return;
}
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
- space_info);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(space_info);
if (last_tickets_id == space_info->tickets_id) {
flush_state++;
} else {
@@ -1197,11 +1223,11 @@ static void do_async_reclaim_metadata_space(struct btrfs_space_info *space_info)
if (flush_state > final_state) {
commit_cycles++;
if (commit_cycles > 2) {
- if (maybe_fail_all_tickets(fs_info, space_info)) {
+ if (maybe_fail_all_tickets(space_info)) {
flush_state = FLUSH_DELAYED_ITEMS_NR;
commit_cycles--;
} else {
- space_info->flush = 0;
+ space_info->flush = false;
}
} else {
flush_state = FLUSH_DELAYED_ITEMS_NR;
@@ -1257,14 +1283,15 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
trans_rsv = &fs_info->trans_block_rsv;
spin_lock(&space_info->lock);
- while (need_preemptive_reclaim(fs_info, space_info)) {
+ while (need_preemptive_reclaim(space_info)) {
enum btrfs_flush_state flush;
u64 delalloc_size = 0;
u64 to_reclaim, block_rsv_size;
const u64 global_rsv_size = btrfs_block_rsv_reserved(global_rsv);
+ const u64 bytes_may_use = space_info->bytes_may_use;
+ const u64 bytes_pinned = space_info->bytes_pinned;
- loops++;
-
+ spin_unlock(&space_info->lock);
/*
* We don't have a precise counter for the metadata being
* reserved for delalloc, so we'll approximate it by subtracting
@@ -1276,8 +1303,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
btrfs_block_rsv_reserved(delayed_block_rsv) +
btrfs_block_rsv_reserved(delayed_refs_rsv) +
btrfs_block_rsv_reserved(trans_rsv);
- if (block_rsv_size < space_info->bytes_may_use)
- delalloc_size = space_info->bytes_may_use - block_rsv_size;
+ if (block_rsv_size < bytes_may_use)
+ delalloc_size = bytes_may_use - block_rsv_size;
/*
* We don't want to include the global_rsv in our calculation,
@@ -1294,10 +1321,10 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
if (delalloc_size > block_rsv_size) {
to_reclaim = delalloc_size;
flush = FLUSH_DELALLOC;
- } else if (space_info->bytes_pinned >
+ } else if (bytes_pinned >
(btrfs_block_rsv_reserved(delayed_block_rsv) +
btrfs_block_rsv_reserved(delayed_refs_rsv))) {
- to_reclaim = space_info->bytes_pinned;
+ to_reclaim = bytes_pinned;
flush = COMMIT_TRANS;
} else if (btrfs_block_rsv_reserved(delayed_block_rsv) >
btrfs_block_rsv_reserved(delayed_refs_rsv)) {
@@ -1308,7 +1335,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
flush = FLUSH_DELAYED_REFS_NR;
}
- spin_unlock(&space_info->lock);
+ loops++;
/*
* We don't want to reclaim everything, just a portion, so scale
@@ -1318,7 +1345,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
to_reclaim >>= 2;
if (!to_reclaim)
to_reclaim = btrfs_calc_insert_metadata_size(fs_info, 1);
- flush_space(fs_info, space_info, to_reclaim, flush, true);
+ flush_space(space_info, to_reclaim, flush, true);
cond_resched();
spin_lock(&space_info->lock);
}
@@ -1383,7 +1410,7 @@ static void do_async_reclaim_data_space(struct btrfs_space_info *space_info)
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
- space_info->flush = 0;
+ space_info->flush = false;
spin_unlock(&space_info->lock);
return;
}
@@ -1391,27 +1418,27 @@ static void do_async_reclaim_data_space(struct btrfs_space_info *space_info)
spin_unlock(&space_info->lock);
while (!space_info->full) {
- flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
+ flush_space(space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
- space_info->flush = 0;
+ space_info->flush = false;
spin_unlock(&space_info->lock);
return;
}
/* Something happened, fail everything and bail. */
- if (BTRFS_FS_ERROR(fs_info))
+ if (unlikely(BTRFS_FS_ERROR(fs_info)))
goto aborted_fs;
last_tickets_id = space_info->tickets_id;
spin_unlock(&space_info->lock);
}
while (flush_state < ARRAY_SIZE(data_flush_states)) {
- flush_space(fs_info, space_info, U64_MAX,
+ flush_space(space_info, U64_MAX,
data_flush_states[flush_state], false);
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
- space_info->flush = 0;
+ space_info->flush = false;
spin_unlock(&space_info->lock);
return;
}
@@ -1425,16 +1452,16 @@ static void do_async_reclaim_data_space(struct btrfs_space_info *space_info)
if (flush_state >= ARRAY_SIZE(data_flush_states)) {
if (space_info->full) {
- if (maybe_fail_all_tickets(fs_info, space_info))
+ if (maybe_fail_all_tickets(space_info))
flush_state = 0;
else
- space_info->flush = 0;
+ space_info->flush = false;
} else {
flush_state = 0;
}
/* Something happened, fail everything and bail. */
- if (BTRFS_FS_ERROR(fs_info))
+ if (unlikely(BTRFS_FS_ERROR(fs_info)))
goto aborted_fs;
}
@@ -1443,8 +1470,8 @@ static void do_async_reclaim_data_space(struct btrfs_space_info *space_info)
return;
aborted_fs:
- maybe_fail_all_tickets(fs_info, space_info);
- space_info->flush = 0;
+ maybe_fail_all_tickets(space_info);
+ space_info->flush = false;
spin_unlock(&space_info->lock);
}
@@ -1489,40 +1516,47 @@ static const enum btrfs_flush_state evict_flush_states[] = {
RESET_ZONES,
};
-static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- struct reserve_ticket *ticket,
- const enum btrfs_flush_state *states,
- int states_nr)
+static bool is_ticket_served(struct reserve_ticket *ticket)
{
+ bool ret;
+
+ spin_lock(&ticket->lock);
+ ret = (ticket->bytes == 0);
+ spin_unlock(&ticket->lock);
+
+ return ret;
+}
+
+static void priority_reclaim_metadata_space(struct btrfs_space_info *space_info,
+ struct reserve_ticket *ticket,
+ const enum btrfs_flush_state *states,
+ int states_nr)
+{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
u64 to_reclaim;
int flush_state = 0;
- spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
/*
* This is the priority reclaim path, so to_reclaim could be >0 still
* because we may have only satisfied the priority tickets and still
* left non priority tickets on the list. We would then have
* to_reclaim but ->bytes == 0.
*/
- if (ticket->bytes == 0) {
- spin_unlock(&space_info->lock);
+ if (is_ticket_served(ticket))
return;
- }
+
+ spin_lock(&space_info->lock);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(space_info);
+ spin_unlock(&space_info->lock);
while (flush_state < states_nr) {
- spin_unlock(&space_info->lock);
- flush_space(fs_info, space_info, to_reclaim, states[flush_state],
- false);
- flush_state++;
- spin_lock(&space_info->lock);
- if (ticket->bytes == 0) {
- spin_unlock(&space_info->lock);
+ flush_space(space_info, to_reclaim, states[flush_state], false);
+ if (is_ticket_served(ticket))
return;
- }
+ flush_state++;
}
+ spin_lock(&space_info->lock);
/*
* Attempt to steal from the global rsv if we can, except if the fs was
* turned into error mode due to a transaction abort when flushing space
@@ -1531,48 +1565,38 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
* just to have caller fail immediately instead of later when trying to
* modify the fs, making it easier to debug -ENOSPC problems.
*/
- if (BTRFS_FS_ERROR(fs_info)) {
- ticket->error = BTRFS_FS_ERROR(fs_info);
- remove_ticket(space_info, ticket);
- } else if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
- ticket->error = -ENOSPC;
- remove_ticket(space_info, ticket);
- }
+ if (unlikely(BTRFS_FS_ERROR(fs_info)))
+ remove_ticket(space_info, ticket, BTRFS_FS_ERROR(fs_info));
+ else if (!steal_from_global_rsv(space_info, ticket))
+ remove_ticket(space_info, ticket, -ENOSPC);
/*
* We must run try_granting_tickets here because we could be a large
* ticket in front of a smaller ticket that can now be satisfied with
* the available space.
*/
- btrfs_try_granting_tickets(fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
}
-static void priority_reclaim_data_space(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
+static void priority_reclaim_data_space(struct btrfs_space_info *space_info,
struct reserve_ticket *ticket)
{
- spin_lock(&space_info->lock);
-
/* We could have been granted before we got here. */
- if (ticket->bytes == 0) {
- spin_unlock(&space_info->lock);
+ if (is_ticket_served(ticket))
return;
- }
+ spin_lock(&space_info->lock);
while (!space_info->full) {
spin_unlock(&space_info->lock);
- flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
- spin_lock(&space_info->lock);
- if (ticket->bytes == 0) {
- spin_unlock(&space_info->lock);
+ flush_space(space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
+ if (is_ticket_served(ticket))
return;
- }
+ spin_lock(&space_info->lock);
}
- ticket->error = -ENOSPC;
- remove_ticket(space_info, ticket);
- btrfs_try_granting_tickets(fs_info, space_info);
+ remove_ticket(space_info, ticket, -ENOSPC);
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
}
@@ -1581,11 +1605,13 @@ static void wait_reserve_ticket(struct btrfs_space_info *space_info,
{
DEFINE_WAIT(wait);
- int ret = 0;
- spin_lock(&space_info->lock);
+ spin_lock(&ticket->lock);
while (ticket->bytes > 0 && ticket->error == 0) {
+ int ret;
+
ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
+ spin_unlock(&ticket->lock);
if (ret) {
/*
* Delete us from the list. After we unlock the space
@@ -1595,24 +1621,23 @@ static void wait_reserve_ticket(struct btrfs_space_info *space_info,
* despite getting an error, resulting in a space leak
* (bytes_may_use counter of our space_info).
*/
- remove_ticket(space_info, ticket);
- ticket->error = -EINTR;
- break;
+ spin_lock(&space_info->lock);
+ remove_ticket(space_info, ticket, -EINTR);
+ spin_unlock(&space_info->lock);
+ return;
}
- spin_unlock(&space_info->lock);
schedule();
finish_wait(&ticket->wait, &wait);
- spin_lock(&space_info->lock);
+ spin_lock(&ticket->lock);
}
- spin_unlock(&space_info->lock);
+ spin_unlock(&ticket->lock);
}
/*
* Do the appropriate flushing and waiting for a ticket.
*
- * @fs_info: the filesystem
* @space_info: space info for the reservation
* @ticket: ticket for the reservation
* @start_ns: timestamp when the reservation started
@@ -1622,8 +1647,7 @@ static void wait_reserve_ticket(struct btrfs_space_info *space_info,
* This does the work of figuring out how to flush for the ticket, waiting for
* the reservation, and returning the appropriate error if there is one.
*/
-static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
+static int handle_reserve_ticket(struct btrfs_space_info *space_info,
struct reserve_ticket *ticket,
u64 start_ns, u64 orig_bytes,
enum btrfs_reserve_flush_enum flush)
@@ -1637,20 +1661,20 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
wait_reserve_ticket(space_info, ticket);
break;
case BTRFS_RESERVE_FLUSH_LIMIT:
- priority_reclaim_metadata_space(fs_info, space_info, ticket,
+ priority_reclaim_metadata_space(space_info, ticket,
priority_flush_states,
ARRAY_SIZE(priority_flush_states));
break;
case BTRFS_RESERVE_FLUSH_EVICT:
- priority_reclaim_metadata_space(fs_info, space_info, ticket,
+ priority_reclaim_metadata_space(space_info, ticket,
evict_flush_states,
ARRAY_SIZE(evict_flush_states));
break;
case BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE:
- priority_reclaim_data_space(fs_info, space_info, ticket);
+ priority_reclaim_data_space(space_info, ticket);
break;
default:
- ASSERT(0);
+ ASSERT(0, "flush=%d", flush);
break;
}
@@ -1662,9 +1686,10 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
* releasing reserved space (if an error happens the expectation is that
* space wasn't reserved at all).
*/
- ASSERT(!(ticket->bytes == 0 && ticket->error));
- trace_btrfs_reserve_ticket(fs_info, space_info->flags, orig_bytes,
- start_ns, flush, ticket->error);
+ ASSERT(!(ticket->bytes == 0 && ticket->error),
+ "ticket->bytes=%llu ticket->error=%d", ticket->bytes, ticket->error);
+ trace_btrfs_reserve_ticket(space_info->fs_info, space_info->flags,
+ orig_bytes, start_ns, flush, ticket->error);
return ret;
}
@@ -1678,9 +1703,9 @@ static inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush)
(flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
}
-static inline void maybe_clamp_preempt(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info)
+static inline void maybe_clamp_preempt(struct btrfs_space_info *space_info)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
u64 ordered = percpu_counter_sum_positive(&fs_info->ordered_bytes);
u64 delalloc = percpu_counter_sum_positive(&fs_info->delalloc_bytes);
@@ -1715,7 +1740,6 @@ static inline bool can_ticket(enum btrfs_reserve_flush_enum flush)
/*
* Try to reserve bytes from the block_rsv's space.
*
- * @fs_info: the filesystem
* @space_info: space info we want to allocate from
* @orig_bytes: number of bytes we want
* @flush: whether or not we can flush to make our reservation
@@ -1727,10 +1751,10 @@ static inline bool can_ticket(enum btrfs_reserve_flush_enum flush)
* regain reservations will be made and this will fail if there is not enough
* space already.
*/
-static int __reserve_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info, u64 orig_bytes,
- enum btrfs_reserve_flush_enum flush)
+static int reserve_bytes(struct btrfs_space_info *space_info, u64 orig_bytes,
+ enum btrfs_reserve_flush_enum flush)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
struct work_struct *async_work;
struct reserve_ticket ticket;
u64 start_ns = 0;
@@ -1738,7 +1762,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
int ret = -ENOSPC;
bool pending_tickets;
- ASSERT(orig_bytes);
+ ASSERT(orig_bytes, "orig_bytes=%llu", orig_bytes);
/*
* If have a transaction handle (current->journal_info != NULL), then
* the flush method can not be neither BTRFS_RESERVE_FLUSH_ALL* nor
@@ -1747,9 +1771,9 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
*/
if (current->journal_info) {
/* One assert per line for easier debugging. */
- ASSERT(flush != BTRFS_RESERVE_FLUSH_ALL);
- ASSERT(flush != BTRFS_RESERVE_FLUSH_ALL_STEAL);
- ASSERT(flush != BTRFS_RESERVE_FLUSH_EVICT);
+ ASSERT(flush != BTRFS_RESERVE_FLUSH_ALL, "flush=%d", flush);
+ ASSERT(flush != BTRFS_RESERVE_FLUSH_ALL_STEAL, "flush=%d", flush);
+ ASSERT(flush != BTRFS_RESERVE_FLUSH_EVICT, "flush=%d", flush);
}
if (flush == BTRFS_RESERVE_FLUSH_DATA)
@@ -1777,7 +1801,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
*/
if (!pending_tickets &&
((used + orig_bytes <= space_info->total_bytes) ||
- btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
+ can_overcommit(space_info, used, orig_bytes, flush))) {
btrfs_space_info_update_bytes_may_use(space_info, orig_bytes);
ret = 0;
}
@@ -1788,7 +1812,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
* left to allocate for the block.
*/
if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) {
- used = btrfs_space_info_used(space_info, false);
+ used -= space_info->bytes_may_use;
if (used + orig_bytes <= space_info->total_bytes) {
btrfs_space_info_update_bytes_may_use(space_info, orig_bytes);
ret = 0;
@@ -1807,6 +1831,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
ticket.error = 0;
space_info->reclaim_size += ticket.bytes;
init_waitqueue_head(&ticket.wait);
+ spin_lock_init(&ticket.lock);
ticket.steal = can_steal(flush);
if (trace_btrfs_reserve_ticket_enabled())
start_ns = ktime_get_ns();
@@ -1823,9 +1848,9 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
* preemptive flushing in order to keep up with
* the workload.
*/
- maybe_clamp_preempt(fs_info, space_info);
+ maybe_clamp_preempt(space_info);
- space_info->flush = 1;
+ space_info->flush = true;
trace_btrfs_trigger_flush(fs_info,
space_info->flags,
orig_bytes, flush,
@@ -1844,7 +1869,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
*/
if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
!work_busy(&fs_info->preempt_reclaim_work) &&
- need_preemptive_reclaim(fs_info, space_info)) {
+ need_preemptive_reclaim(space_info)) {
trace_btrfs_trigger_flush(fs_info, space_info->flags,
orig_bytes, flush, "preempt");
queue_work(system_dfl_wq,
@@ -1855,14 +1880,12 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
if (!ret || !can_ticket(flush))
return ret;
- return handle_reserve_ticket(fs_info, space_info, &ticket, start_ns,
- orig_bytes, flush);
+ return handle_reserve_ticket(space_info, &ticket, start_ns, orig_bytes, flush);
}
/*
* Try to reserve metadata bytes from the block_rsv's space.
*
- * @fs_info: the filesystem
* @space_info: the space_info we're allocating for
* @orig_bytes: number of bytes we want
* @flush: whether or not we can flush to make our reservation
@@ -1874,20 +1897,21 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
* regain reservations will be made and this will fail if there is not enough
* space already.
*/
-int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
+int btrfs_reserve_metadata_bytes(struct btrfs_space_info *space_info,
u64 orig_bytes,
enum btrfs_reserve_flush_enum flush)
{
int ret;
- ret = __reserve_bytes(fs_info, space_info, orig_bytes, flush);
+ ret = reserve_bytes(space_info, orig_bytes, flush);
if (ret == -ENOSPC) {
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
+
trace_btrfs_space_reservation(fs_info, "space_info:enospc",
space_info->flags, orig_bytes, 1);
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
- btrfs_dump_space_info(fs_info, space_info, orig_bytes, false);
+ btrfs_dump_space_info(space_info, orig_bytes, false);
}
return ret;
}
@@ -1895,7 +1919,7 @@ int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
/*
* Try to reserve data bytes for an allocation.
*
- * @fs_info: the filesystem
+ * @space_info: the space_info we're allocating for
* @bytes: number of bytes we need
* @flush: how we are allowed to flush
*
@@ -1910,15 +1934,17 @@ int btrfs_reserve_data_bytes(struct btrfs_space_info *space_info, u64 bytes,
ASSERT(flush == BTRFS_RESERVE_FLUSH_DATA ||
flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE ||
- flush == BTRFS_RESERVE_NO_FLUSH);
- ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA);
+ flush == BTRFS_RESERVE_NO_FLUSH, "flush=%d", flush);
+ ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA,
+ "current->journal_info=0x%lx flush=%d",
+ (unsigned long)current->journal_info, flush);
- ret = __reserve_bytes(fs_info, space_info, bytes, flush);
+ ret = reserve_bytes(space_info, bytes, flush);
if (ret == -ENOSPC) {
trace_btrfs_space_reservation(fs_info, "space_info:enospc",
space_info->flags, bytes, 1);
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
- btrfs_dump_space_info(fs_info, space_info, bytes, false);
+ btrfs_dump_space_info(space_info, bytes, false);
}
return ret;
}
@@ -1931,7 +1957,7 @@ __cold void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info)
btrfs_info(fs_info, "dumping space info:");
list_for_each_entry(space_info, &fs_info->space_info, list) {
spin_lock(&space_info->lock);
- __btrfs_dump_space_info(fs_info, space_info);
+ __btrfs_dump_space_info(space_info);
spin_unlock(&space_info->lock);
}
dump_global_block_rsv(fs_info);
@@ -1948,7 +1974,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
int factor;
/* It's df, we don't care if it's racy */
- if (list_empty(&sinfo->ro_bgs))
+ if (data_race(list_empty(&sinfo->ro_bgs)))
return 0;
spin_lock(&sinfo->lock);
@@ -2187,7 +2213,7 @@ void btrfs_return_free_space(struct btrfs_space_info *space_info, u64 len)
global_rsv->reserved += to_add;
btrfs_space_info_update_bytes_may_use(space_info, to_add);
if (global_rsv->reserved >= global_rsv->size)
- global_rsv->full = 1;
+ global_rsv->full = true;
len -= to_add;
}
spin_unlock(&global_rsv->lock);
@@ -2195,5 +2221,5 @@ void btrfs_return_free_space(struct btrfs_space_info *space_info, u64 len)
grant:
/* Add to any tickets we may have. */
if (len)
- btrfs_try_granting_tickets(fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
}