diff options
Diffstat (limited to 'fs/bcachefs/recovery_passes.c')
-rw-r--r-- | fs/bcachefs/recovery_passes.c | 646 |
1 files changed, 0 insertions, 646 deletions
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c deleted file mode 100644 index 6a039e011064..000000000000 --- a/fs/bcachefs/recovery_passes.c +++ /dev/null @@ -1,646 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include "bcachefs.h" -#include "alloc_background.h" -#include "backpointers.h" -#include "btree_gc.h" -#include "btree_node_scan.h" -#include "disk_accounting.h" -#include "ec.h" -#include "fsck.h" -#include "inode.h" -#include "journal.h" -#include "lru.h" -#include "logged_ops.h" -#include "movinggc.h" -#include "rebalance.h" -#include "recovery.h" -#include "recovery_passes.h" -#include "snapshot.h" -#include "subvolume.h" -#include "super.h" -#include "super-io.h" - -const char * const bch2_recovery_passes[] = { -#define x(_fn, ...) #_fn, - BCH_RECOVERY_PASSES() -#undef x - NULL -}; - -static const u8 passes_to_stable_map[] = { -#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n, - BCH_RECOVERY_PASSES() -#undef x -}; - -static const u8 passes_from_stable_map[] = { -#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n, - BCH_RECOVERY_PASSES() -#undef x -}; - -static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass) -{ - return passes_to_stable_map[pass]; -} - -u64 bch2_recovery_passes_to_stable(u64 v) -{ - u64 ret = 0; - for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++) - if (v & BIT_ULL(i)) - ret |= BIT_ULL(passes_to_stable_map[i]); - return ret; -} - -static enum bch_recovery_pass bch2_recovery_pass_from_stable(enum bch_recovery_pass_stable pass) -{ - return pass < ARRAY_SIZE(passes_from_stable_map) - ? passes_from_stable_map[pass] - : 0; -} - -u64 bch2_recovery_passes_from_stable(u64 v) -{ - u64 ret = 0; - for (unsigned i = 0; i < ARRAY_SIZE(passes_from_stable_map); i++) - if (v & BIT_ULL(i)) - ret |= BIT_ULL(passes_from_stable_map[i]); - return ret; -} - -static int bch2_sb_recovery_passes_validate(struct bch_sb *sb, struct bch_sb_field *f, - enum bch_validate_flags flags, struct printbuf *err) -{ - return 0; -} - -static void bch2_sb_recovery_passes_to_text(struct printbuf *out, - struct bch_sb *sb, - struct bch_sb_field *f) -{ - struct bch_sb_field_recovery_passes *r = - field_to_type(f, recovery_passes); - unsigned nr = recovery_passes_nr_entries(r); - - if (out->nr_tabstops < 1) - printbuf_tabstop_push(out, 32); - if (out->nr_tabstops < 2) - printbuf_tabstop_push(out, 16); - - prt_printf(out, "Pass\tLast run\tLast runtime\n"); - - for (struct recovery_pass_entry *i = r->start; i < r->start + nr; i++) { - if (!i->last_run) - continue; - - unsigned idx = i - r->start; - - prt_printf(out, "%s\t", bch2_recovery_passes[bch2_recovery_pass_from_stable(idx)]); - - bch2_prt_datetime(out, le64_to_cpu(i->last_run)); - prt_tab(out); - - bch2_pr_time_units(out, le32_to_cpu(i->last_runtime) * NSEC_PER_SEC); - - if (BCH_RECOVERY_PASS_NO_RATELIMIT(i)) - prt_str(out, " (no ratelimit)"); - - prt_newline(out); - } -} - -static struct recovery_pass_entry *bch2_sb_recovery_pass_entry(struct bch_fs *c, - enum bch_recovery_pass pass) -{ - enum bch_recovery_pass_stable stable = bch2_recovery_pass_to_stable(pass); - - lockdep_assert_held(&c->sb_lock); - - struct bch_sb_field_recovery_passes *r = - bch2_sb_field_get(c->disk_sb.sb, recovery_passes); - - if (stable >= recovery_passes_nr_entries(r)) { - unsigned u64s = struct_size(r, start, stable + 1) / sizeof(u64); - - r = bch2_sb_field_resize(&c->disk_sb, recovery_passes, u64s); - if (!r) { - bch_err(c, "error creating recovery_passes sb section"); - return NULL; - } - } - - return r->start + stable; -} - -static void bch2_sb_recovery_pass_complete(struct bch_fs *c, - enum bch_recovery_pass pass, - s64 start_time) -{ - guard(mutex)(&c->sb_lock); - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - __clear_bit_le64(bch2_recovery_pass_to_stable(pass), - ext->recovery_passes_required); - - struct recovery_pass_entry *e = bch2_sb_recovery_pass_entry(c, pass); - if (e) { - s64 end_time = ktime_get_real_seconds(); - e->last_run = cpu_to_le64(end_time); - e->last_runtime = cpu_to_le32(max(0, end_time - start_time)); - SET_BCH_RECOVERY_PASS_NO_RATELIMIT(e, false); - } - - bch2_write_super(c); -} - -void bch2_recovery_pass_set_no_ratelimit(struct bch_fs *c, - enum bch_recovery_pass pass) -{ - guard(mutex)(&c->sb_lock); - - struct recovery_pass_entry *e = bch2_sb_recovery_pass_entry(c, pass); - if (e && !BCH_RECOVERY_PASS_NO_RATELIMIT(e)) { - SET_BCH_RECOVERY_PASS_NO_RATELIMIT(e, false); - bch2_write_super(c); - } -} - -static bool bch2_recovery_pass_want_ratelimit(struct bch_fs *c, enum bch_recovery_pass pass) -{ - enum bch_recovery_pass_stable stable = bch2_recovery_pass_to_stable(pass); - bool ret = false; - - lockdep_assert_held(&c->sb_lock); - - struct bch_sb_field_recovery_passes *r = - bch2_sb_field_get(c->disk_sb.sb, recovery_passes); - - if (stable < recovery_passes_nr_entries(r)) { - struct recovery_pass_entry *i = r->start + stable; - - /* - * Ratelimit if the last runtime was more than 1% of the time - * since we last ran - */ - ret = (u64) le32_to_cpu(i->last_runtime) * 100 > - ktime_get_real_seconds() - le64_to_cpu(i->last_run); - - if (BCH_RECOVERY_PASS_NO_RATELIMIT(i)) - ret = false; - } - - return ret; -} - -const struct bch_sb_field_ops bch_sb_field_ops_recovery_passes = { - .validate = bch2_sb_recovery_passes_validate, - .to_text = bch2_sb_recovery_passes_to_text -}; - -/* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */ -static int bch2_recovery_pass_empty(struct bch_fs *c) -{ - return 0; -} - -static int bch2_set_may_go_rw(struct bch_fs *c) -{ - struct journal_keys *keys = &c->journal_keys; - - /* - * After we go RW, the journal keys buffer can't be modified (except for - * setting journal_key->overwritten: it will be accessed by multiple - * threads - */ - move_gap(keys, keys->nr); - - set_bit(BCH_FS_may_go_rw, &c->flags); - - if (go_rw_in_recovery(c)) { - if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) { - bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate"); - bch2_reconstruct_alloc(c); - } - - return bch2_fs_read_write_early(c); - } - return 0; -} - -/* - * Make sure root inode is readable while we're still in recovery and can rewind - * for repair: - */ -static int bch2_lookup_root_inode(struct bch_fs *c) -{ - subvol_inum inum = BCACHEFS_ROOT_SUBVOL_INUM; - struct bch_inode_unpacked inode_u; - struct bch_subvolume subvol; - - return bch2_trans_do(c, - bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: - bch2_inode_find_by_inum_trans(trans, inum, &inode_u)); -} - -struct recovery_pass_fn { - int (*fn)(struct bch_fs *); - unsigned when; -}; - -static struct recovery_pass_fn recovery_pass_fns[] = { -#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, - BCH_RECOVERY_PASSES() -#undef x -}; - -static u64 bch2_recovery_passes_match(unsigned flags) -{ - u64 ret = 0; - - for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) - if (recovery_pass_fns[i].when & flags) - ret |= BIT_ULL(i); - return ret; -} - -u64 bch2_fsck_recovery_passes(void) -{ - return bch2_recovery_passes_match(PASS_FSCK); -} - -static void bch2_run_async_recovery_passes(struct bch_fs *c) -{ - if (!down_trylock(&c->recovery.run_lock)) - return; - - if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_async_recovery_passes)) - goto unlock; - - if (queue_work(system_long_wq, &c->recovery.work)) - return; - - enumerated_ref_put(&c->writes, BCH_WRITE_REF_async_recovery_passes); -unlock: - up(&c->recovery.run_lock); -} - -static bool recovery_pass_needs_set(struct bch_fs *c, - enum bch_recovery_pass pass, - enum bch_run_recovery_pass_flags *flags) -{ - struct bch_fs_recovery *r = &c->recovery; - - /* - * Never run scan_for_btree_nodes persistently: check_topology will run - * it if required - */ - if (pass == BCH_RECOVERY_PASS_scan_for_btree_nodes) - *flags |= RUN_RECOVERY_PASS_nopersistent; - - if ((*flags & RUN_RECOVERY_PASS_ratelimit) && - !bch2_recovery_pass_want_ratelimit(c, pass)) - *flags &= ~RUN_RECOVERY_PASS_ratelimit; - - /* - * If RUN_RECOVERY_PASS_nopersistent is set, we don't want to do - * anything if the pass has already run: these mean we need a prior pass - * to run before we continue to repair, we don't expect that pass to fix - * the damage we encountered. - * - * Otherwise, we run run_explicit_recovery_pass when we find damage, so - * it should run again even if it's already run: - */ - bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); - bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent); - bool rewind = in_recovery && - r->curr_pass > pass && - !(r->passes_complete & BIT_ULL(pass)); - - if (persistent - ? !(c->sb.recovery_passes_required & BIT_ULL(pass)) - : !((r->passes_to_run|r->passes_complete) & BIT_ULL(pass))) - return true; - - if (!(*flags & RUN_RECOVERY_PASS_ratelimit) && - (r->passes_ratelimiting & BIT_ULL(pass))) - return true; - - if (rewind) - return true; - - return false; -} - -/* - * For when we need to rewind recovery passes and run a pass we skipped: - */ -int __bch2_run_explicit_recovery_pass(struct bch_fs *c, - struct printbuf *out, - enum bch_recovery_pass pass, - enum bch_run_recovery_pass_flags flags) -{ - struct bch_fs_recovery *r = &c->recovery; - int ret = 0; - - lockdep_assert_held(&c->sb_lock); - - bch2_printbuf_make_room(out, 1024); - out->atomic++; - - unsigned long lockflags; - spin_lock_irqsave(&r->lock, lockflags); - - if (!recovery_pass_needs_set(c, pass, &flags)) - goto out; - - bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); - bool rewind = in_recovery && - r->curr_pass > pass && - !(r->passes_complete & BIT_ULL(pass)); - bool ratelimit = flags & RUN_RECOVERY_PASS_ratelimit; - - if (!(flags & RUN_RECOVERY_PASS_nopersistent)) { - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); - } - - if (pass < BCH_RECOVERY_PASS_set_may_go_rw && - (!in_recovery || r->curr_pass >= BCH_RECOVERY_PASS_set_may_go_rw)) { - prt_printf(out, "need recovery pass %s (%u), but already rw\n", - bch2_recovery_passes[pass], pass); - ret = bch_err_throw(c, cannot_rewind_recovery); - goto out; - } - - if (ratelimit) - r->passes_ratelimiting |= BIT_ULL(pass); - else - r->passes_ratelimiting &= ~BIT_ULL(pass); - - if (in_recovery && !ratelimit) { - prt_printf(out, "running recovery pass %s (%u), currently at %s (%u)%s\n", - bch2_recovery_passes[pass], pass, - bch2_recovery_passes[r->curr_pass], r->curr_pass, - rewind ? " - rewinding" : ""); - - r->passes_to_run |= BIT_ULL(pass); - - if (rewind) { - r->next_pass = pass; - r->passes_complete &= (1ULL << pass) >> 1; - ret = bch_err_throw(c, restart_recovery); - } - } else { - prt_printf(out, "scheduling recovery pass %s (%u)%s\n", - bch2_recovery_passes[pass], pass, - ratelimit ? " - ratelimiting" : ""); - - struct recovery_pass_fn *p = recovery_pass_fns + pass; - if (p->when & PASS_ONLINE) - bch2_run_async_recovery_passes(c); - } -out: - spin_unlock_irqrestore(&r->lock, lockflags); - --out->atomic; - return ret; -} - -int bch2_run_explicit_recovery_pass(struct bch_fs *c, - struct printbuf *out, - enum bch_recovery_pass pass, - enum bch_run_recovery_pass_flags flags) -{ - int ret = 0; - - if (recovery_pass_needs_set(c, pass, &flags)) { - guard(mutex)(&c->sb_lock); - ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags); - bch2_write_super(c); - } - - return ret; -} - -/* - * Returns 0 if @pass has run recently, otherwise one of - * -BCH_ERR_restart_recovery - * -BCH_ERR_recovery_pass_will_run - */ -int bch2_require_recovery_pass(struct bch_fs *c, - struct printbuf *out, - enum bch_recovery_pass pass) -{ - if (test_bit(BCH_FS_in_recovery, &c->flags) && - c->recovery.passes_complete & BIT_ULL(pass)) - return 0; - - guard(mutex)(&c->sb_lock); - - if (bch2_recovery_pass_want_ratelimit(c, pass)) - return 0; - - enum bch_run_recovery_pass_flags flags = 0; - int ret = 0; - - if (recovery_pass_needs_set(c, pass, &flags)) { - ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags); - bch2_write_super(c); - } - - return ret ?: bch_err_throw(c, recovery_pass_will_run); -} - -int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) -{ - enum bch_run_recovery_pass_flags flags = 0; - - if (!recovery_pass_needs_set(c, pass, &flags)) - return 0; - - struct printbuf buf = PRINTBUF; - bch2_log_msg_start(c, &buf); - - mutex_lock(&c->sb_lock); - int ret = __bch2_run_explicit_recovery_pass(c, &buf, pass, - RUN_RECOVERY_PASS_nopersistent); - mutex_unlock(&c->sb_lock); - - bch2_print_str(c, KERN_NOTICE, buf.buf); - printbuf_exit(&buf); - return ret; -} - -static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) -{ - struct bch_fs_recovery *r = &c->recovery; - struct recovery_pass_fn *p = recovery_pass_fns + pass; - - if (!(p->when & PASS_SILENT)) - bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."), - bch2_recovery_passes[pass]); - - s64 start_time = ktime_get_real_seconds(); - int ret = p->fn(c); - - r->passes_to_run &= ~BIT_ULL(pass); - - if (ret) { - r->passes_failing |= BIT_ULL(pass); - return ret; - } - - r->passes_failing = 0; - - if (!test_bit(BCH_FS_error, &c->flags)) - bch2_sb_recovery_pass_complete(c, pass, start_time); - - if (!(p->when & PASS_SILENT)) - bch2_print(c, KERN_CONT " done\n"); - - return 0; -} - -static int __bch2_run_recovery_passes(struct bch_fs *c, u64 orig_passes_to_run, - bool online) -{ - struct bch_fs_recovery *r = &c->recovery; - int ret = 0; - - spin_lock_irq(&r->lock); - - if (online) - orig_passes_to_run &= bch2_recovery_passes_match(PASS_ONLINE); - - if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) - orig_passes_to_run &= ~bch2_recovery_passes_match(PASS_ALLOC); - - /* - * A failed recovery pass will be retried after another pass succeeds - - * but not this iteration. - * - * This is because some passes depend on repair done by other passes: we - * may want to retry, but we don't want to loop on failing passes. - */ - - orig_passes_to_run &= ~r->passes_failing; - - r->passes_to_run = orig_passes_to_run; - - while (r->passes_to_run) { - unsigned prev_done = r->pass_done; - unsigned pass = __ffs64(r->passes_to_run); - r->curr_pass = pass; - r->next_pass = r->curr_pass + 1; - r->passes_to_run &= ~BIT_ULL(pass); - - spin_unlock_irq(&r->lock); - - int ret2 = bch2_run_recovery_pass(c, pass) ?: - bch2_journal_flush(&c->journal); - - spin_lock_irq(&r->lock); - - if (r->next_pass < r->curr_pass) { - /* Rewind: */ - r->passes_to_run |= orig_passes_to_run & (~0ULL << r->next_pass); - } else if (!ret2) { - r->pass_done = max(r->pass_done, pass); - r->passes_complete |= BIT_ULL(pass); - } else { - ret = ret2; - } - - if (ret && !online) - break; - - if (prev_done <= BCH_RECOVERY_PASS_check_snapshots && - r->pass_done > BCH_RECOVERY_PASS_check_snapshots) { - bch2_copygc_wakeup(c); - bch2_rebalance_wakeup(c); - } - } - - clear_bit(BCH_FS_in_recovery, &c->flags); - spin_unlock_irq(&r->lock); - - return ret; -} - -static void bch2_async_recovery_passes_work(struct work_struct *work) -{ - struct bch_fs *c = container_of(work, struct bch_fs, recovery.work); - struct bch_fs_recovery *r = &c->recovery; - - __bch2_run_recovery_passes(c, - c->sb.recovery_passes_required & ~r->passes_ratelimiting, - true); - - up(&r->run_lock); - enumerated_ref_put(&c->writes, BCH_WRITE_REF_async_recovery_passes); -} - -int bch2_run_online_recovery_passes(struct bch_fs *c, u64 passes) -{ - return __bch2_run_recovery_passes(c, c->sb.recovery_passes_required|passes, true); -} - -int bch2_run_recovery_passes(struct bch_fs *c, enum bch_recovery_pass from) -{ - u64 passes = - bch2_recovery_passes_match(PASS_ALWAYS) | - (!c->sb.clean ? bch2_recovery_passes_match(PASS_UNCLEAN) : 0) | - (c->opts.fsck ? bch2_recovery_passes_match(PASS_FSCK) : 0) | - c->opts.recovery_passes | - c->sb.recovery_passes_required; - - if (c->opts.recovery_pass_last) - passes &= BIT_ULL(c->opts.recovery_pass_last + 1) - 1; - - /* - * We can't allow set_may_go_rw to be excluded; that would cause us to - * use the journal replay keys for updates where it's not expected. - */ - c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; - passes &= ~c->opts.recovery_passes_exclude; - - passes &= ~(BIT_ULL(from) - 1); - - down(&c->recovery.run_lock); - int ret = __bch2_run_recovery_passes(c, passes, false); - up(&c->recovery.run_lock); - - return ret; -} - -static void prt_passes(struct printbuf *out, const char *msg, u64 passes) -{ - prt_printf(out, "%s:\t", msg); - prt_bitflags(out, bch2_recovery_passes, passes); - prt_newline(out); -} - -void bch2_recovery_pass_status_to_text(struct printbuf *out, struct bch_fs *c) -{ - struct bch_fs_recovery *r = &c->recovery; - - printbuf_tabstop_push(out, 32); - prt_passes(out, "Scheduled passes", c->sb.recovery_passes_required); - prt_passes(out, "Scheduled online passes", c->sb.recovery_passes_required & - bch2_recovery_passes_match(PASS_ONLINE)); - prt_passes(out, "Complete passes", r->passes_complete); - prt_passes(out, "Failing passes", r->passes_failing); - - if (r->curr_pass) { - prt_printf(out, "Current pass:\t%s\n", bch2_recovery_passes[r->curr_pass]); - prt_passes(out, "Current passes", r->passes_to_run); - } -} - -void bch2_fs_recovery_passes_init(struct bch_fs *c) -{ - spin_lock_init(&c->recovery.lock); - sema_init(&c->recovery.run_lock, 1); - - INIT_WORK(&c->recovery.work, bch2_async_recovery_passes_work); -} |