summaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_node_scan.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/btree_node_scan.c')
-rw-r--r--fs/bcachefs/btree_node_scan.c611
1 files changed, 0 insertions, 611 deletions
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
deleted file mode 100644
index a3fb07c60e25..000000000000
--- a/fs/bcachefs/btree_node_scan.c
+++ /dev/null
@@ -1,611 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include "bcachefs.h"
-#include "btree_cache.h"
-#include "btree_io.h"
-#include "btree_journal_iter.h"
-#include "btree_node_scan.h"
-#include "btree_update_interior.h"
-#include "buckets.h"
-#include "error.h"
-#include "journal_io.h"
-#include "recovery_passes.h"
-
-#include <linux/kthread.h>
-#include <linux/min_heap.h>
-#include <linux/sched/sysctl.h>
-#include <linux/sort.h>
-
-struct find_btree_nodes_worker {
- struct closure *cl;
- struct find_btree_nodes *f;
- struct bch_dev *ca;
-};
-
-static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n)
-{
- bch2_btree_id_level_to_text(out, n->btree_id, n->level);
- prt_printf(out, " seq=%u journal_seq=%llu cookie=%llx ",
- n->seq, n->journal_seq, n->cookie);
- bch2_bpos_to_text(out, n->min_key);
- prt_str(out, "-");
- bch2_bpos_to_text(out, n->max_key);
-
- if (n->range_updated)
- prt_str(out, " range updated");
-
- for (unsigned i = 0; i < n->nr_ptrs; i++) {
- prt_char(out, ' ');
- bch2_extent_ptr_to_text(out, c, n->ptrs + i);
- }
-}
-
-static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes)
-{
- printbuf_indent_add(out, 2);
- darray_for_each(nodes, i) {
- found_btree_node_to_text(out, c, i);
- prt_newline(out);
- }
- printbuf_indent_sub(out, 2);
-}
-
-static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f)
-{
- struct bkey_i_btree_ptr_v2 *bp = bkey_btree_ptr_v2_init(k);
-
- set_bkey_val_u64s(&bp->k, sizeof(struct bch_btree_ptr_v2) / sizeof(u64) + f->nr_ptrs);
- bp->k.p = f->max_key;
- bp->v.seq = cpu_to_le64(f->cookie);
- bp->v.sectors_written = 0;
- bp->v.flags = 0;
- bp->v.sectors_written = cpu_to_le16(f->sectors_written);
- bp->v.min_key = f->min_key;
- SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated);
- memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs);
-}
-
-static inline u64 bkey_journal_seq(struct bkey_s_c k)
-{
- switch (k.k->type) {
- case KEY_TYPE_inode_v3:
- return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_journal_seq);
- default:
- return 0;
- }
-}
-
-static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
-{
- const struct found_btree_node *l = _l;
- const struct found_btree_node *r = _r;
-
- return cmp_int(l->btree_id, r->btree_id) ?:
- cmp_int(l->level, r->level) ?:
- cmp_int(l->cookie, r->cookie);
-}
-
-/*
- * Given two found btree nodes, if their sequence numbers are equal, take the
- * one that's readable:
- */
-static int found_btree_node_cmp_time(const struct found_btree_node *l,
- const struct found_btree_node *r)
-{
- return cmp_int(l->seq, r->seq) ?:
- cmp_int(l->journal_seq, r->journal_seq);
-}
-
-static int found_btree_node_cmp_pos(const void *_l, const void *_r)
-{
- const struct found_btree_node *l = _l;
- const struct found_btree_node *r = _r;
-
- return cmp_int(l->btree_id, r->btree_id) ?:
- -cmp_int(l->level, r->level) ?:
- bpos_cmp(l->min_key, r->min_key) ?:
- -found_btree_node_cmp_time(l, r);
-}
-
-static inline bool found_btree_node_cmp_pos_less(const void *l, const void *r, void *arg)
-{
- return found_btree_node_cmp_pos(l, r) < 0;
-}
-
-static inline void found_btree_node_swap(void *_l, void *_r, void *arg)
-{
- struct found_btree_node *l = _l;
- struct found_btree_node *r = _r;
-
- swap(*l, *r);
-}
-
-static const struct min_heap_callbacks found_btree_node_heap_cbs = {
- .less = found_btree_node_cmp_pos_less,
- .swp = found_btree_node_swap,
-};
-
-static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
- struct btree *b, struct bio *bio, u64 offset)
-{
- struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
- struct btree_node *bn = b->data;
-
- bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
- bio->bi_iter.bi_sector = offset;
- bch2_bio_map(bio, b->data, c->opts.block_size);
-
- u64 submit_time = local_clock();
- submit_bio_wait(bio);
- bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
-
- if (bio->bi_status) {
- bch_err_dev_ratelimited(ca,
- "IO error in try_read_btree_node() at %llu: %s",
- offset, bch2_blk_status_to_str(bio->bi_status));
- return;
- }
-
- if (le64_to_cpu(bn->magic) != bset_magic(c))
- return;
-
- if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
- if (!c->chacha20_key_set)
- return;
-
- struct nonce nonce = btree_nonce(&bn->keys, 0);
- unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
-
- bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes);
- }
-
- if (btree_id_is_alloc(BTREE_NODE_ID(bn)))
- return;
-
- if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH)
- return;
-
- if (BTREE_NODE_ID(bn) >= BTREE_ID_NR_MAX)
- return;
-
- rcu_read_lock();
- struct found_btree_node n = {
- .btree_id = BTREE_NODE_ID(bn),
- .level = BTREE_NODE_LEVEL(bn),
- .seq = BTREE_NODE_SEQ(bn),
- .cookie = le64_to_cpu(bn->keys.seq),
- .min_key = bn->min_key,
- .max_key = bn->max_key,
- .nr_ptrs = 1,
- .ptrs[0].type = 1 << BCH_EXTENT_ENTRY_ptr,
- .ptrs[0].offset = offset,
- .ptrs[0].dev = ca->dev_idx,
- .ptrs[0].gen = bucket_gen_get(ca, sector_to_bucket(ca, offset)),
- };
- rcu_read_unlock();
-
- bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
- bio->bi_iter.bi_sector = offset;
- bch2_bio_map(bio, b->data, c->opts.btree_node_size);
-
- submit_time = local_clock();
- submit_bio_wait(bio);
- bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
-
- found_btree_node_to_key(&b->key, &n);
-
- CLASS(printbuf, buf)();
- if (!bch2_btree_node_read_done(c, ca, b, NULL, &buf)) {
- /* read_done will swap out b->data for another buffer */
- bn = b->data;
- /*
- * Grab journal_seq here because we want the max journal_seq of
- * any bset; read_done sorts down to a single set and picks the
- * max journal_seq
- */
- n.journal_seq = le64_to_cpu(bn->keys.journal_seq),
- n.sectors_written = b->written;
-
- mutex_lock(&f->lock);
- if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
- bch_err(c, "try_read_btree_node() can't handle endian conversion");
- f->ret = -EINVAL;
- goto unlock;
- }
-
- if (darray_push(&f->nodes, n))
- f->ret = -ENOMEM;
-unlock:
- mutex_unlock(&f->lock);
- }
-}
-
-static int read_btree_nodes_worker(void *p)
-{
- struct find_btree_nodes_worker *w = p;
- struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
- struct bch_dev *ca = w->ca;
- unsigned long last_print = jiffies;
- struct btree *b = NULL;
- struct bio *bio = NULL;
-
- b = __bch2_btree_node_mem_alloc(c);
- if (!b) {
- bch_err(c, "read_btree_nodes_worker: error allocating buf");
- w->f->ret = -ENOMEM;
- goto err;
- }
-
- bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL);
- if (!bio) {
- bch_err(c, "read_btree_nodes_worker: error allocating bio");
- w->f->ret = -ENOMEM;
- goto err;
- }
-
- for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++)
- for (unsigned bucket_offset = 0;
- bucket_offset + btree_sectors(c) <= ca->mi.bucket_size;
- bucket_offset += btree_sectors(c)) {
- if (time_after(jiffies, last_print + HZ * 30)) {
- u64 cur_sector = bucket * ca->mi.bucket_size + bucket_offset;
- u64 end_sector = ca->mi.nbuckets * ca->mi.bucket_size;
-
- bch_info(ca, "%s: %2u%% done", __func__,
- (unsigned) div64_u64(cur_sector * 100, end_sector));
- last_print = jiffies;
- }
-
- u64 sector = bucket * ca->mi.bucket_size + bucket_offset;
-
- if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
- !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
- continue;
-
- try_read_btree_node(w->f, ca, b, bio, sector);
- }
-err:
- if (b)
- __btree_node_data_free(b);
- kfree(b);
- bio_put(bio);
- enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
- closure_put(w->cl);
- kfree(w);
- return 0;
-}
-
-static int read_btree_nodes(struct find_btree_nodes *f)
-{
- struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
- struct closure cl;
- int ret = 0;
-
- closure_init_stack(&cl);
-
- for_each_online_member(c, ca, BCH_DEV_READ_REF_btree_node_scan) {
- if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree)))
- continue;
-
- struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
- if (!w) {
- enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
- ret = -ENOMEM;
- goto err;
- }
-
- w->cl = &cl;
- w->f = f;
- w->ca = ca;
-
- struct task_struct *t = kthread_create(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
- ret = PTR_ERR_OR_ZERO(t);
- if (ret) {
- enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
- kfree(w);
- bch_err_msg(c, ret, "starting kthread");
- break;
- }
-
- closure_get(&cl);
- enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
- wake_up_process(t);
- }
-err:
- while (closure_sync_timeout(&cl, sysctl_hung_task_timeout_secs * HZ / 2))
- ;
- return f->ret ?: ret;
-}
-
-static bool nodes_overlap(const struct found_btree_node *l,
- const struct found_btree_node *r)
-{
- return (l->btree_id == r->btree_id &&
- l->level == r->level &&
- bpos_gt(l->max_key, r->min_key));
-}
-
-static int handle_overwrites(struct bch_fs *c,
- struct found_btree_node *l,
- found_btree_nodes *nodes_heap)
-{
- struct found_btree_node *r;
-
- while ((r = min_heap_peek(nodes_heap)) &&
- nodes_overlap(l, r)) {
- int cmp = found_btree_node_cmp_time(l, r);
-
- if (cmp > 0) {
- if (bpos_cmp(l->max_key, r->max_key) >= 0)
- min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
- else {
- r->range_updated = true;
- r->min_key = bpos_successor(l->max_key);
- r->range_updated = true;
- min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL);
- }
- } else if (cmp < 0) {
- BUG_ON(bpos_eq(l->min_key, r->min_key));
-
- l->max_key = bpos_predecessor(r->min_key);
- l->range_updated = true;
- } else if (r->level) {
- min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
- } else {
- if (bpos_cmp(l->max_key, r->max_key) >= 0)
- min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
- else {
- r->range_updated = true;
- r->min_key = bpos_successor(l->max_key);
- r->range_updated = true;
- min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL);
- }
- }
-
- cond_resched();
- }
-
- return 0;
-}
-
-int bch2_scan_for_btree_nodes(struct bch_fs *c)
-{
- struct find_btree_nodes *f = &c->found_btree_nodes;
- struct printbuf buf = PRINTBUF;
- found_btree_nodes nodes_heap = {};
- size_t dst;
- int ret = 0;
-
- if (f->nodes.nr)
- return 0;
-
- mutex_init(&f->lock);
-
- ret = read_btree_nodes(f);
- if (ret)
- return ret;
-
- if (!f->nodes.nr) {
- bch_err(c, "%s: no btree nodes found", __func__);
- ret = -EINVAL;
- goto err;
- }
-
- if (0 && c->opts.verbose) {
- printbuf_reset(&buf);
- prt_printf(&buf, "%s: nodes found:\n", __func__);
- found_btree_nodes_to_text(&buf, c, f->nodes);
- bch2_print_str(c, KERN_INFO, buf.buf);
- }
-
- sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
-
- dst = 0;
- darray_for_each(f->nodes, i) {
- struct found_btree_node *prev = dst ? f->nodes.data + dst - 1 : NULL;
-
- if (prev &&
- prev->cookie == i->cookie) {
- if (prev->nr_ptrs == ARRAY_SIZE(prev->ptrs)) {
- bch_err(c, "%s: found too many replicas for btree node", __func__);
- ret = -EINVAL;
- goto err;
- }
- prev->ptrs[prev->nr_ptrs++] = i->ptrs[0];
- } else {
- f->nodes.data[dst++] = *i;
- }
- }
- f->nodes.nr = dst;
-
- sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
-
- if (0 && c->opts.verbose) {
- printbuf_reset(&buf);
- prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__);
- found_btree_nodes_to_text(&buf, c, f->nodes);
- bch2_print_str(c, KERN_INFO, buf.buf);
- }
-
- swap(nodes_heap, f->nodes);
-
- {
- /* darray must have same layout as a heap */
- min_heap_char real_heap;
- BUILD_BUG_ON(sizeof(nodes_heap.nr) != sizeof(real_heap.nr));
- BUILD_BUG_ON(sizeof(nodes_heap.size) != sizeof(real_heap.size));
- BUILD_BUG_ON(offsetof(found_btree_nodes, nr) != offsetof(min_heap_char, nr));
- BUILD_BUG_ON(offsetof(found_btree_nodes, size) != offsetof(min_heap_char, size));
- }
-
- min_heapify_all(&nodes_heap, &found_btree_node_heap_cbs, NULL);
-
- if (nodes_heap.nr) {
- ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap));
- if (ret)
- goto err;
-
- min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL);
- }
-
- while (true) {
- ret = handle_overwrites(c, &darray_last(f->nodes), &nodes_heap);
- if (ret)
- goto err;
-
- if (!nodes_heap.nr)
- break;
-
- ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap));
- if (ret)
- goto err;
-
- min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL);
- }
-
- for (struct found_btree_node *n = f->nodes.data; n < &darray_last(f->nodes); n++)
- BUG_ON(nodes_overlap(n, n + 1));
-
- if (0 && c->opts.verbose) {
- printbuf_reset(&buf);
- prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
- found_btree_nodes_to_text(&buf, c, f->nodes);
- bch2_print_str(c, KERN_INFO, buf.buf);
- } else {
- bch_info(c, "btree node scan found %zu nodes after overwrites", f->nodes.nr);
- }
-
- eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
-err:
- darray_exit(&nodes_heap);
- printbuf_exit(&buf);
- return ret;
-}
-
-static int found_btree_node_range_start_cmp(const void *_l, const void *_r)
-{
- const struct found_btree_node *l = _l;
- const struct found_btree_node *r = _r;
-
- return cmp_int(l->btree_id, r->btree_id) ?:
- -cmp_int(l->level, r->level) ?:
- bpos_cmp(l->max_key, r->min_key);
-}
-
-#define for_each_found_btree_node_in_range(_f, _search, _idx) \
- for (size_t _idx = eytzinger0_find_gt((_f)->nodes.data, (_f)->nodes.nr, \
- sizeof((_f)->nodes.data[0]), \
- found_btree_node_range_start_cmp, &search); \
- _idx < (_f)->nodes.nr && \
- (_f)->nodes.data[_idx].btree_id == _search.btree_id && \
- (_f)->nodes.data[_idx].level == _search.level && \
- bpos_lt((_f)->nodes.data[_idx].min_key, _search.max_key); \
- _idx = eytzinger0_next(_idx, (_f)->nodes.nr))
-
-bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b)
-{
- struct find_btree_nodes *f = &c->found_btree_nodes;
-
- struct found_btree_node search = {
- .btree_id = b->c.btree_id,
- .level = b->c.level,
- .min_key = b->data->min_key,
- .max_key = b->key.k.p,
- };
-
- for_each_found_btree_node_in_range(f, search, idx)
- if (f->nodes.data[idx].seq > BTREE_NODE_SEQ(b->data))
- return true;
- return false;
-}
-
-int bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
-{
- int ret = bch2_run_print_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
- if (ret)
- return ret;
-
- struct found_btree_node search = {
- .btree_id = btree,
- .level = 0,
- .min_key = POS_MIN,
- .max_key = SPOS_MAX,
- };
-
- for_each_found_btree_node_in_range(&c->found_btree_nodes, search, idx)
- return true;
- return false;
-}
-
-int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
- unsigned level, struct bpos node_min, struct bpos node_max)
-{
- if (btree_id_is_alloc(btree))
- return 0;
-
- struct find_btree_nodes *f = &c->found_btree_nodes;
-
- int ret = bch2_run_print_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
- if (ret)
- return ret;
-
- if (c->opts.verbose) {
- struct printbuf buf = PRINTBUF;
-
- prt_str(&buf, "recovery ");
- bch2_btree_id_level_to_text(&buf, btree, level);
- prt_str(&buf, " ");
- bch2_bpos_to_text(&buf, node_min);
- prt_str(&buf, " - ");
- bch2_bpos_to_text(&buf, node_max);
-
- bch_info(c, "%s(): %s", __func__, buf.buf);
- printbuf_exit(&buf);
- }
-
- struct found_btree_node search = {
- .btree_id = btree,
- .level = level,
- .min_key = node_min,
- .max_key = node_max,
- };
-
- for_each_found_btree_node_in_range(f, search, idx) {
- struct found_btree_node n = f->nodes.data[idx];
-
- n.range_updated |= bpos_lt(n.min_key, node_min);
- n.min_key = bpos_max(n.min_key, node_min);
-
- n.range_updated |= bpos_gt(n.max_key, node_max);
- n.max_key = bpos_min(n.max_key, node_max);
-
- struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
-
- found_btree_node_to_key(&tmp.k, &n);
-
- if (c->opts.verbose) {
- struct printbuf buf = PRINTBUF;
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
- bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
- printbuf_exit(&buf);
- }
-
- BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k),
- (struct bkey_validate_context) {
- .from = BKEY_VALIDATE_btree_node,
- .level = level + 1,
- .btree = btree,
- }));
-
- ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-void bch2_find_btree_nodes_exit(struct find_btree_nodes *f)
-{
- darray_exit(&f->nodes);
-}