summaryrefslogtreecommitdiff
path: root/fs/btrfs/bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/bio.c')
-rw-r--r--fs/btrfs/bio.c290
1 files changed, 202 insertions, 88 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 21df48e6c4fa..fa1d321a2fb8 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -41,13 +41,17 @@ static bool bbio_has_ordered_extent(const struct btrfs_bio *bbio)
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
*/
-void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
+void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode, u64 file_offset,
btrfs_bio_end_io_t end_io, void *private)
{
+ /* @inode parameter is mandatory. */
+ ASSERT(inode);
+
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
- bbio->fs_info = fs_info;
+ bbio->inode = inode;
bbio->end_io = end_io;
bbio->private = private;
+ bbio->file_offset = file_offset;
atomic_set(&bbio->pending_ios, 1);
WRITE_ONCE(bbio->status, BLK_STS_OK);
}
@@ -60,7 +64,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
* a mempool.
*/
struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
- struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode, u64 file_offset,
btrfs_bio_end_io_t end_io, void *private)
{
struct btrfs_bio *bbio;
@@ -68,7 +72,7 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
bbio = btrfs_bio(bio);
- btrfs_bio_init(bbio, fs_info, end_io, private);
+ btrfs_bio_init(bbio, inode, file_offset, end_io, private);
return bbio;
}
@@ -85,13 +89,13 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
return ERR_CAST(bio);
bbio = btrfs_bio(bio);
- btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
- bbio->inode = orig_bbio->inode;
- bbio->file_offset = orig_bbio->file_offset;
+ btrfs_bio_init(bbio, orig_bbio->inode, orig_bbio->file_offset, NULL, orig_bbio);
orig_bbio->file_offset += map_length;
if (bbio_has_ordered_extent(bbio)) {
refcount_inc(&orig_bbio->ordered->refs);
bbio->ordered = orig_bbio->ordered;
+ bbio->orig_logical = orig_bbio->orig_logical;
+ orig_bbio->orig_logical += map_length;
}
bbio->csum_search_commit_root = orig_bbio->csum_search_commit_root;
atomic_inc(&orig_bbio->pending_ios);
@@ -100,6 +104,12 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
+ /* Make sure we're already in task context. */
+ ASSERT(in_task());
+
+ if (bbio->async_csum)
+ wait_for_completion(&bbio->csum_done);
+
bbio->bio.bi_status = status;
if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
struct btrfs_bio *orig_bbio = bbio->private;
@@ -163,11 +173,30 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
struct btrfs_failed_bio *fbio = repair_bbio->private;
struct btrfs_inode *inode = repair_bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio);
+ /*
+ * We can not move forward the saved_iter, as it will be later
+ * utilized by repair_bbio again.
+ */
+ struct bvec_iter saved_iter = repair_bbio->saved_iter;
+ const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
+ const u64 logical = repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT;
+ const u32 nr_steps = repair_bbio->saved_iter.bi_size / step;
int mirror = repair_bbio->mirror_num;
+ phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
+ phys_addr_t paddr;
+ unsigned int slot = 0;
+
+ /* Repair bbio should be eaxctly one block sized. */
+ ASSERT(repair_bbio->saved_iter.bi_size == fs_info->sectorsize);
+
+ btrfs_bio_for_each_block(paddr, &repair_bbio->bio, &saved_iter, step) {
+ ASSERT(slot < nr_steps);
+ paddrs[slot] = paddr;
+ slot++;
+ }
if (repair_bbio->bio.bi_status ||
- !btrfs_data_csum_ok(repair_bbio, dev, 0, bvec_phys(bv))) {
+ !btrfs_data_csum_ok(repair_bbio, dev, 0, paddrs)) {
bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);
repair_bbio->bio.bi_iter = repair_bbio->saved_iter;
@@ -186,8 +215,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
mirror = prev_repair_mirror(fbio, mirror);
btrfs_repair_io_failure(fs_info, btrfs_ino(inode),
repair_bbio->file_offset, fs_info->sectorsize,
- repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
- bvec_phys(bv), mirror);
+ logical, paddrs, step, mirror);
} while (mirror != fbio->bbio->mirror_num);
done:
@@ -204,21 +232,25 @@ done:
*/
static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
u32 bio_offset,
- phys_addr_t paddr,
+ phys_addr_t paddrs[],
struct btrfs_failed_bio *fbio)
{
struct btrfs_inode *inode = failed_bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct folio *folio = page_folio(phys_to_page(paddr));
const u32 sectorsize = fs_info->sectorsize;
- const u32 foff = offset_in_folio(folio, paddr);
- const u64 logical = (failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT);
+ const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
+ const u32 nr_steps = sectorsize / step;
+ /*
+ * For bs > ps cases, the saved_iter can be partially moved forward.
+ * In that case we should round it down to the block boundary.
+ */
+ const u64 logical = round_down(failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
+ sectorsize);
struct btrfs_bio *repair_bbio;
struct bio *repair_bio;
int num_copies;
int mirror;
- ASSERT(foff + sectorsize <= folio_size(folio));
btrfs_debug(fs_info, "repair read error: read error at %llu",
failed_bbio->file_offset + bio_offset);
@@ -238,15 +270,22 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
atomic_inc(&fbio->repair_count);
- repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS,
+ repair_bio = bio_alloc_bioset(NULL, nr_steps, REQ_OP_READ, GFP_NOFS,
&btrfs_repair_bioset);
- repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector;
- bio_add_folio_nofail(repair_bio, folio, sectorsize, foff);
+ repair_bio->bi_iter.bi_sector = logical >> SECTOR_SHIFT;
+ for (int i = 0; i < nr_steps; i++) {
+ int ret;
+
+ ASSERT(offset_in_page(paddrs[i]) + step <= PAGE_SIZE);
+
+ ret = bio_add_page(repair_bio, phys_to_page(paddrs[i]), step,
+ offset_in_page(paddrs[i]));
+ ASSERT(ret == step);
+ }
repair_bbio = btrfs_bio(repair_bio);
- btrfs_bio_init(repair_bbio, fs_info, NULL, fbio);
- repair_bbio->inode = failed_bbio->inode;
- repair_bbio->file_offset = failed_bbio->file_offset + bio_offset;
+ btrfs_bio_init(repair_bbio, failed_bbio->inode, failed_bbio->file_offset + bio_offset,
+ NULL, fbio);
mirror = next_repair_mirror(fbio, failed_bbio->mirror_num);
btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror);
@@ -258,10 +297,13 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
{
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- u32 sectorsize = fs_info->sectorsize;
+ const u32 sectorsize = fs_info->sectorsize;
+ const u32 step = min(sectorsize, PAGE_SIZE);
+ const u32 nr_steps = sectorsize / step;
struct bvec_iter *iter = &bbio->saved_iter;
blk_status_t status = bbio->bio.bi_status;
struct btrfs_failed_bio *fbio = NULL;
+ phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
phys_addr_t paddr;
u32 offset = 0;
@@ -280,13 +322,19 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
/* Clear the I/O error. A failed repair will reset it. */
bbio->bio.bi_status = BLK_STS_OK;
- btrfs_bio_for_each_block(paddr, &bbio->bio, iter, fs_info->sectorsize) {
- if (status || !btrfs_data_csum_ok(bbio, dev, offset, paddr))
- fbio = repair_one_sector(bbio, offset, paddr, fbio);
- offset += sectorsize;
+ btrfs_bio_for_each_block(paddr, &bbio->bio, iter, step) {
+ paddrs[(offset / step) % nr_steps] = paddr;
+ offset += step;
+
+ if (IS_ALIGNED(offset, sectorsize)) {
+ if (status ||
+ !btrfs_data_csum_ok(bbio, dev, offset - sectorsize, paddrs))
+ fbio = repair_one_sector(bbio, offset - sectorsize,
+ paddrs, fbio);
+ }
}
if (bbio->csum != bbio->csum_inline)
- kfree(bbio->csum);
+ kvfree(bbio->csum);
if (fbio)
btrfs_repair_done(fbio);
@@ -317,36 +365,35 @@ static struct workqueue_struct *btrfs_end_io_wq(const struct btrfs_fs_info *fs_i
return fs_info->endio_workers;
}
-static void btrfs_end_bio_work(struct work_struct *work)
+static void simple_end_io_work(struct work_struct *work)
{
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
+ struct bio *bio = &bbio->bio;
- /* Metadata reads are checked and repaired by the submitter. */
- if (is_data_bbio(bbio))
- btrfs_check_read_bio(bbio, bbio->bio.bi_private);
- else
- btrfs_bio_end_io(bbio, bbio->bio.bi_status);
+ if (bio_op(bio) == REQ_OP_READ) {
+ /* Metadata reads are checked and repaired by the submitter. */
+ if (is_data_bbio(bbio))
+ return btrfs_check_read_bio(bbio, bbio->bio.bi_private);
+ return btrfs_bio_end_io(bbio, bbio->bio.bi_status);
+ }
+ if (bio_is_zone_append(bio) && !bio->bi_status)
+ btrfs_record_physical_zoned(bbio);
+ btrfs_bio_end_io(bbio, bbio->bio.bi_status);
}
static void btrfs_simple_end_io(struct bio *bio)
{
struct btrfs_bio *bbio = btrfs_bio(bio);
struct btrfs_device *dev = bio->bi_private;
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
btrfs_bio_counter_dec(fs_info);
if (bio->bi_status)
btrfs_log_dev_io_error(bio, dev);
- if (bio_op(bio) == REQ_OP_READ) {
- INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
- queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
- } else {
- if (bio_is_zone_append(bio) && !bio->bi_status)
- btrfs_record_physical_zoned(bbio);
- btrfs_bio_end_io(bbio, bbio->bio.bi_status);
- }
+ INIT_WORK(&bbio->end_io_work, simple_end_io_work);
+ queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
}
static void btrfs_raid56_end_io(struct bio *bio)
@@ -354,6 +401,9 @@ static void btrfs_raid56_end_io(struct bio *bio)
struct btrfs_io_context *bioc = bio->bi_private;
struct btrfs_bio *bbio = btrfs_bio(bio);
+ /* RAID56 endio is always handled in workqueue. */
+ ASSERT(in_task());
+
btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
@@ -364,11 +414,12 @@ static void btrfs_raid56_end_io(struct bio *bio)
btrfs_put_bioc(bioc);
}
-static void btrfs_orig_write_end_io(struct bio *bio)
+static void orig_write_end_io_work(struct work_struct *work)
{
+ struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
+ struct bio *bio = &bbio->bio;
struct btrfs_io_stripe *stripe = bio->bi_private;
struct btrfs_io_context *bioc = stripe->bioc;
- struct btrfs_bio *bbio = btrfs_bio(bio);
btrfs_bio_counter_dec(bioc->fs_info);
@@ -393,8 +444,18 @@ static void btrfs_orig_write_end_io(struct bio *bio)
btrfs_put_bioc(bioc);
}
-static void btrfs_clone_write_end_io(struct bio *bio)
+static void btrfs_orig_write_end_io(struct bio *bio)
{
+ struct btrfs_bio *bbio = btrfs_bio(bio);
+
+ INIT_WORK(&bbio->end_io_work, orig_write_end_io_work);
+ queue_work(btrfs_end_io_wq(bbio->inode->root->fs_info, bio), &bbio->end_io_work);
+}
+
+static void clone_write_end_io_work(struct work_struct *work)
+{
+ struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
+ struct bio *bio = &bbio->bio;
struct btrfs_io_stripe *stripe = bio->bi_private;
if (bio->bi_status) {
@@ -409,6 +470,14 @@ static void btrfs_clone_write_end_io(struct bio *bio)
bio_put(bio);
}
+static void btrfs_clone_write_end_io(struct bio *bio)
+{
+ struct btrfs_bio *bbio = btrfs_bio(bio);
+
+ INIT_WORK(&bbio->end_io_work, clone_write_end_io_work);
+ queue_work(btrfs_end_io_wq(bbio->inode->root->fs_info, bio), &bbio->end_io_work);
+}
+
static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
{
if (!dev || !dev->bdev ||
@@ -455,6 +524,7 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
{
struct bio *orig_bio = bioc->orig_bio, *bio;
+ struct btrfs_bio *orig_bbio = btrfs_bio(orig_bio);
ASSERT(bio_op(orig_bio) != REQ_OP_READ);
@@ -463,8 +533,11 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
bio = orig_bio;
bio->bi_end_io = btrfs_orig_write_end_io;
} else {
- bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
+ /* We need to use endio_work to run end_io in task context. */
+ bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &btrfs_bioset);
bio_inc_remaining(orig_bio);
+ btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode,
+ orig_bbio->file_offset, NULL, NULL);
bio->bi_end_io = btrfs_clone_write_end_io;
}
@@ -509,7 +582,11 @@ static int btrfs_bio_csum(struct btrfs_bio *bbio)
{
if (bbio->bio.bi_opf & REQ_META)
return btree_csum_one_bio(bbio);
- return btrfs_csum_one_bio(bbio);
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ return btrfs_csum_one_bio(bbio, true);
+#else
+ return btrfs_csum_one_bio(bbio, false);
+#endif
}
/*
@@ -581,20 +658,25 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)
static bool should_async_write(struct btrfs_bio *bbio)
{
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
bool auto_csum_mode = true;
#ifdef CONFIG_BTRFS_EXPERIMENTAL
- struct btrfs_fs_devices *fs_devices = bbio->fs_info->fs_devices;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);
- if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_OFF)
- return false;
-
- auto_csum_mode = (csum_mode == BTRFS_OFFLOAD_CSUM_AUTO);
+ if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_ON)
+ return true;
+ /*
+ * Write bios will calculate checksum and submit bio at the same time.
+ * Unless explicitly required don't offload serial csum calculate and bio
+ * submit into a workqueue.
+ */
+ return false;
#endif
/* Submit synchronously if the checksum implementation is fast. */
- if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
+ if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
return false;
/*
@@ -605,7 +687,7 @@ static bool should_async_write(struct btrfs_bio *bbio)
return false;
/* Zoned devices require I/O to be submitted in order. */
- if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(bbio->fs_info))
+ if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(fs_info))
return false;
return true;
@@ -620,7 +702,7 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
struct btrfs_io_context *bioc,
struct btrfs_io_stripe *smap, int mirror_num)
{
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
struct async_submit_bio *async;
async = kmalloc(sizeof(*async), GFP_NOFS);
@@ -639,11 +721,12 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
{
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
unsigned int nr_segs;
int sector_offset;
- map_length = min(map_length, bbio->fs_info->max_zone_append_size);
- sector_offset = bio_split_rw_at(&bbio->bio, &bbio->fs_info->limits,
+ map_length = min(map_length, fs_info->max_zone_append_size);
+ sector_offset = bio_split_rw_at(&bbio->bio, &fs_info->limits,
&nr_segs, map_length);
if (sector_offset) {
/*
@@ -651,7 +734,7 @@ static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
* sectorsize and thus cause unaligned I/Os. Fix that by
* always rounding down to the nearest boundary.
*/
- return ALIGN_DOWN(sector_offset << SECTOR_SHIFT, bbio->fs_info->sectorsize);
+ return ALIGN_DOWN(sector_offset << SECTOR_SHIFT, fs_info->sectorsize);
}
return map_length;
}
@@ -659,7 +742,7 @@ static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
{
struct btrfs_inode *inode = bbio->inode;
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio = &bbio->bio;
u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 length = bio->bi_iter.bi_size;
@@ -670,7 +753,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
blk_status_t status;
int ret;
- if (!bbio->inode || btrfs_is_data_reloc_root(inode->root))
+ if (bbio->is_scrub || btrfs_is_data_reloc_root(inode->root))
smap.rst_search_commit_root = true;
else
smap.rst_search_commit_root = false;
@@ -684,6 +767,14 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
goto end_bbio;
}
+ /*
+ * For fscrypt writes we will get the encrypted bio after we've remapped
+ * our bio to the physical disk location, so we need to save the
+ * original bytenr so we know what we're checksumming.
+ */
+ if (bio_op(bio) == REQ_OP_WRITE && is_data_bbio(bbio))
+ bbio->orig_logical = logical;
+
map_length = min(map_length, length);
if (use_append)
map_length = btrfs_append_map_length(bbio, map_length);
@@ -734,7 +825,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
* Csum items for reloc roots have already been cloned at this
* point, so they are handled as part of the no-checksum case.
*/
- if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) &&
+ if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
!test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) &&
!btrfs_is_data_reloc_root(inode->root)) {
if (should_async_write(bbio) &&
@@ -782,25 +873,27 @@ end_bbio:
static void assert_bbio_alignment(struct btrfs_bio *bbio)
{
#ifdef CONFIG_BTRFS_ASSERT
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
struct bio_vec bvec;
struct bvec_iter iter;
const u32 blocksize = fs_info->sectorsize;
+ const u32 alignment = min(blocksize, PAGE_SIZE);
+ const u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
+ const u32 length = bbio->bio.bi_iter.bi_size;
- /* Metadata has no extra bs > ps alignment requirement. */
- if (!is_data_bbio(bbio))
- return;
+ /* The logical and length should still be aligned to blocksize. */
+ ASSERT(IS_ALIGNED(logical, blocksize) && IS_ALIGNED(length, blocksize) &&
+ length != 0, "root=%llu inode=%llu logical=%llu length=%u",
+ btrfs_root_id(bbio->inode->root),
+ btrfs_ino(bbio->inode), logical, length);
bio_for_each_bvec(bvec, &bbio->bio, iter)
- ASSERT(IS_ALIGNED(bvec.bv_offset, blocksize) &&
- IS_ALIGNED(bvec.bv_len, blocksize),
+ ASSERT(IS_ALIGNED(bvec.bv_offset, alignment) &&
+ IS_ALIGNED(bvec.bv_len, alignment),
"root=%llu inode=%llu logical=%llu length=%u index=%u bv_offset=%u bv_len=%u",
btrfs_root_id(bbio->inode->root),
- btrfs_ino(bbio->inode),
- bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT,
- bbio->bio.bi_iter.bi_size, iter.bi_idx,
- bvec.bv_offset,
- bvec.bv_len);
+ btrfs_ino(bbio->inode), logical, length, iter.bi_idx,
+ bvec.bv_offset, bvec.bv_len);
#endif
}
@@ -824,18 +917,36 @@ void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num)
*
* The I/O is issued synchronously to block the repair read completion from
* freeing the bio.
+ *
+ * @ino: Offending inode number
+ * @fileoff: File offset inside the inode
+ * @length: Length of the repair write
+ * @logical: Logical address of the range
+ * @paddrs: Physical address array of the content
+ * @step: Length of for each paddrs
+ * @mirror_num: Mirror number to write to. Must not be zero
*/
-int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
- u64 length, u64 logical, phys_addr_t paddr, int mirror_num)
+int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff,
+ u32 length, u64 logical, const phys_addr_t paddrs[],
+ unsigned int step, int mirror_num)
{
+ const u32 nr_steps = DIV_ROUND_UP_POW2(length, step);
struct btrfs_io_stripe smap = { 0 };
- struct bio_vec bvec;
- struct bio bio;
+ struct bio *bio = NULL;
int ret = 0;
ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
BUG_ON(!mirror_num);
+ /* Basic alignment checks. */
+ ASSERT(IS_ALIGNED(logical, fs_info->sectorsize));
+ ASSERT(IS_ALIGNED(length, fs_info->sectorsize));
+ ASSERT(IS_ALIGNED(fileoff, fs_info->sectorsize));
+ /* Either it's a single data or metadata block. */
+ ASSERT(length <= BTRFS_MAX_BLOCKSIZE);
+ ASSERT(step <= length);
+ ASSERT(is_power_of_2(step));
+
if (btrfs_repair_one_zone(fs_info, logical))
return 0;
@@ -855,24 +966,27 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
goto out_counter_dec;
}
- bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
- bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
- __bio_add_page(&bio, phys_to_page(paddr), length, offset_in_page(paddr));
- ret = submit_bio_wait(&bio);
+ bio = bio_alloc(smap.dev->bdev, nr_steps, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS);
+ bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
+ for (int i = 0; i < nr_steps; i++) {
+ ret = bio_add_page(bio, phys_to_page(paddrs[i]), step, offset_in_page(paddrs[i]));
+ /* We should have allocated enough slots to contain all the different pages. */
+ ASSERT(ret == step);
+ }
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
if (ret) {
/* try to remap that extent elsewhere? */
btrfs_dev_stat_inc_and_print(smap.dev, BTRFS_DEV_STAT_WRITE_ERRS);
- goto out_bio_uninit;
+ goto out_counter_dec;
}
btrfs_info_rl(fs_info,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
- ino, start, btrfs_dev_name(smap.dev),
+ ino, fileoff, btrfs_dev_name(smap.dev),
smap.physical >> SECTOR_SHIFT);
ret = 0;
-out_bio_uninit:
- bio_uninit(&bio);
out_counter_dec:
btrfs_bio_counter_dec(fs_info);
return ret;
@@ -885,16 +999,16 @@ out_counter_dec:
*/
void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace)
{
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
u64 length = bbio->bio.bi_iter.bi_size;
struct btrfs_io_stripe smap = { 0 };
int ret;
- ASSERT(fs_info);
ASSERT(mirror_num > 0);
ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
- ASSERT(!bbio->inode);
+ ASSERT(!is_data_inode(bbio->inode));
+ ASSERT(bbio->is_scrub);
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num);