diff options
222 files changed, 4255 insertions, 1342 deletions
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst index c85cd327af28..746ea60eed3f 100644 --- a/Documentation/admin-guide/xfs.rst +++ b/Documentation/admin-guide/xfs.rst @@ -215,6 +215,14 @@ When mounting an XFS filesystem, the following options are accepted. inconsistent namespace presentation during or after a failover event. + errortag=tagname + When specified, enables the error inject tag named "tagname" with the + default frequency. Can be specified multiple times to enable multiple + errortags. Specifying this option on remount will reset the error tag + to the default value if it was set to any other value before. + This option is only supported when CONFIG_XFS_DEBUG is enabled, and + will not be reflected in /proc/self/mounts. + Deprecation of V4 Format ======================== diff --git a/block/bio.c b/block/bio.c index e726c0e280a8..40f690985bfb 100644 --- a/block/bio.c +++ b/block/bio.c @@ -311,6 +311,40 @@ void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf) } EXPORT_SYMBOL(bio_reset); +/** + * bio_reuse - reuse a bio with the payload left intact + * @bio: bio to reuse + * @opf: operation and flags for the next I/O + * + * Allow reusing an existing bio for another operation with all set up + * fields including the payload, device and end_io handler left intact. + * + * Typically used when @bio is first used to read data which is then written + * to another location without modification. @bio must not be in-flight and + * owned by the caller. Can't be used for cloned bios. + * + * Note: Can't be used when @bio has integrity or blk-crypto contexts for now. + * Feel free to add that support when you need it, though. + */ +void bio_reuse(struct bio *bio, blk_opf_t opf) +{ + unsigned short vcnt = bio->bi_vcnt, i; + bio_end_io_t *end_io = bio->bi_end_io; + void *private = bio->bi_private; + + WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); + WARN_ON_ONCE(bio_integrity(bio)); + WARN_ON_ONCE(bio_has_crypt_ctx(bio)); + + bio_reset(bio, bio->bi_bdev, opf); + for (i = 0; i < vcnt; i++) + bio->bi_iter.bi_size += bio->bi_io_vec[i].bv_len; + bio->bi_vcnt = vcnt; + bio->bi_private = private; + bio->bi_end_io = end_io; +} +EXPORT_SYMBOL_GPL(bio_reuse); + static struct bio *__bio_chain_endio(struct bio *bio) { struct bio *parent = bio->bi_private; diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 5bf501cf8271..9f7133e02576 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -88,6 +88,7 @@ xfs-y += xfs_aops.o \ xfs_globals.o \ xfs_handle.o \ xfs_health.o \ + xfs_healthmon.o \ xfs_icache.o \ xfs_ioctl.o \ xfs_iomap.o \ @@ -105,6 +106,7 @@ xfs-y += xfs_aops.o \ xfs_symlink.o \ xfs_sysfs.o \ xfs_trans.o \ + xfs_verify_media.o \ xfs_xattr.o # low-level transaction/log code diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index e6ba914f6d06..586918ed1cbf 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -5,7 +5,7 @@ * All rights reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 8ac8230c3d3c..c4cdcc570d61 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index ad381c73abc4..d99602bcc16f 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" @@ -376,8 +376,8 @@ xfs_alloc_compute_diff( xfs_agblock_t freeend; /* end of freespace extent */ xfs_agblock_t newbno1; /* return block number */ xfs_agblock_t newbno2; /* other new block number */ - xfs_extlen_t newlen1=0; /* length with newbno1 */ - xfs_extlen_t newlen2=0; /* length with newbno2 */ + xfs_extlen_t newlen1 = 0; /* length with newbno1 */ + xfs_extlen_t newlen2 = 0; /* length with newbno2 */ xfs_agblock_t wantend; /* end of target extent */ bool userdata = datatype & XFS_ALLOC_USERDATA; @@ -577,8 +577,8 @@ xfs_alloc_fixup_trees( int i; /* operation results */ xfs_agblock_t nfbno1; /* first new free startblock */ xfs_agblock_t nfbno2; /* second new free startblock */ - xfs_extlen_t nflen1=0; /* first new free length */ - xfs_extlen_t nflen2=0; /* second new free length */ + xfs_extlen_t nflen1 = 0; /* first new free length */ + xfs_extlen_t nflen2 = 0; /* second new free length */ struct xfs_mount *mp; bool fixup_longest = false; diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index fa1f03c1331e..29f6ec1c3f6f 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 8c04acd30d48..93caa1dae501 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -50,7 +50,6 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args); */ STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); -STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp); /* * Internal routines when attribute list is more than one block. @@ -351,16 +350,14 @@ xfs_attr_set_resv( */ STATIC int xfs_attr_try_sf_addname( - struct xfs_inode *dp, struct xfs_da_args *args) { - int error; /* * Build initial attribute list (if required). */ - if (dp->i_af.if_format == XFS_DINODE_FMT_EXTENTS) + if (args->dp->i_af.if_format == XFS_DINODE_FMT_EXTENTS) xfs_attr_shortform_create(args); error = xfs_attr_shortform_addname(args); @@ -372,9 +369,9 @@ xfs_attr_try_sf_addname( * NOTE: this is also the error path (EEXIST, etc). */ if (!error) - xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(args->trans, args->dp, XFS_ICHGTIME_CHG); - if (xfs_has_wsync(dp->i_mount)) + if (xfs_has_wsync(args->dp->i_mount)) xfs_trans_set_sync(args->trans); return error; @@ -385,10 +382,9 @@ xfs_attr_sf_addname( struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; - struct xfs_inode *dp = args->dp; int error = 0; - error = xfs_attr_try_sf_addname(dp, args); + error = xfs_attr_try_sf_addname(args); if (error != -ENOSPC) { ASSERT(!error || error == -EEXIST); attr->xattri_dela_state = XFS_DAS_DONE; @@ -979,11 +975,12 @@ xfs_attr_lookup( return error; if (xfs_attr_is_leaf(dp)) { - error = xfs_attr_leaf_hasname(args, &bp); - - if (bp) - xfs_trans_brelse(args->trans, bp); - + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, + 0, &bp); + if (error) + return error; + error = xfs_attr3_leaf_lookup_int(bp, args); + xfs_trans_brelse(args->trans, bp); return error; } @@ -1032,6 +1029,95 @@ trans_cancel: } /* + * Decide if it is theoretically possible to try to bypass the attr intent + * mechanism for better performance. Other constraints (e.g. available space + * in the existing structure) are not considered here. + */ +static inline bool +xfs_attr_can_shortcut( + const struct xfs_inode *ip) +{ + return xfs_inode_has_attr_fork(ip) && xfs_attr_is_shortform(ip); +} + +/* Try to set an attr in one transaction or fall back to attr intents. */ +int +xfs_attr_setname( + struct xfs_da_args *args, + int rmt_blks) +{ + int error; + + if (!rmt_blks && xfs_attr_can_shortcut(args->dp)) { + args->op_flags |= XFS_DA_OP_ADDNAME; + + error = xfs_attr_try_sf_addname(args); + if (error != -ENOSPC) + return error; + } + + xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET); + return 0; +} + +/* Try to remove an attr in one transaction or fall back to attr intents. */ +int +xfs_attr_removename( + struct xfs_da_args *args) +{ + if (xfs_attr_can_shortcut(args->dp)) + return xfs_attr_sf_removename(args); + + xfs_attr_defer_add(args, XFS_ATTR_DEFER_REMOVE); + return 0; +} + +/* Try to replace an attr in one transaction or fall back to attr intents. */ +int +xfs_attr_replacename( + struct xfs_da_args *args, + int rmt_blks) +{ + int error; + + if (rmt_blks || !xfs_attr_can_shortcut(args->dp)) { + xfs_attr_defer_add(args, XFS_ATTR_DEFER_REPLACE); + return 0; + } + + error = xfs_attr_shortform_replace(args); + if (error != -ENOSPC) + return error; + + args->op_flags |= XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE; + + error = xfs_attr_sf_removename(args); + if (error) + return error; + + if (args->attr_filter & XFS_ATTR_PARENT) { + /* + * Move the new name/value to the regular name/value slots and + * zero out the new name/value slots because we don't need to + * log them for a PPTR_SET operation. + */ + xfs_attr_update_pptr_replace_args(args); + args->new_name = NULL; + args->new_namelen = 0; + args->new_value = NULL; + args->new_valuelen = 0; + } + args->op_flags &= ~XFS_DA_OP_REPLACE; + + error = xfs_attr_try_sf_addname(args); + if (error != -ENOSPC) + return error; + + xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET); + return 0; +} + +/* * Make a change to the xattr structure. * * The caller must have initialized @args, attached dquots, and must not hold @@ -1111,14 +1197,19 @@ xfs_attr_set( case -EEXIST: if (op == XFS_ATTRUPDATE_REMOVE) { /* if no value, we are performing a remove operation */ - xfs_attr_defer_add(args, XFS_ATTR_DEFER_REMOVE); + error = xfs_attr_removename(args); + if (error) + goto out_trans_cancel; break; } /* Pure create fails if the attr already exists */ if (op == XFS_ATTRUPDATE_CREATE) goto out_trans_cancel; - xfs_attr_defer_add(args, XFS_ATTR_DEFER_REPLACE); + + error = xfs_attr_replacename(args, rmt_blks); + if (error) + goto out_trans_cancel; break; case -ENOATTR: /* Can't remove what isn't there. */ @@ -1128,7 +1219,10 @@ xfs_attr_set( /* Pure replace fails if no existing attr to replace. */ if (op == XFS_ATTRUPDATE_REPLACE) goto out_trans_cancel; - xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET); + + error = xfs_attr_setname(args, rmt_blks); + if (error) + goto out_trans_cancel; break; default: goto out_trans_cancel; @@ -1223,27 +1317,6 @@ xfs_attr_shortform_addname( *========================================================================*/ /* - * Return EEXIST if attr is found, or ENOATTR if not - */ -STATIC int -xfs_attr_leaf_hasname( - struct xfs_da_args *args, - struct xfs_buf **bp) -{ - int error = 0; - - error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, bp); - if (error) - return error; - - error = xfs_attr3_leaf_lookup_int(*bp, args); - if (error != -ENOATTR && error != -EEXIST) - xfs_trans_brelse(args->trans, *bp); - - return error; -} - -/* * Remove a name from the leaf attribute list structure * * This leaf block cannot have a "remote" value, we only call this routine @@ -1253,25 +1326,22 @@ STATIC int xfs_attr_leaf_removename( struct xfs_da_args *args) { - struct xfs_inode *dp; - struct xfs_buf *bp; + struct xfs_inode *dp = args->dp; int error, forkoff; + struct xfs_buf *bp; trace_xfs_attr_leaf_removename(args); - /* - * Remove the attribute. - */ - dp = args->dp; - - error = xfs_attr_leaf_hasname(args, &bp); - if (error == -ENOATTR) { + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp); + if (error) + return error; + error = xfs_attr3_leaf_lookup_int(bp, args); + if (error != -EEXIST) { xfs_trans_brelse(args->trans, bp); - if (args->op_flags & XFS_DA_OP_RECOVERY) + if (error == -ENOATTR && (args->op_flags & XFS_DA_OP_RECOVERY)) return 0; return error; - } else if (error != -EEXIST) - return error; + } xfs_attr3_leaf_remove(bp, args); @@ -1295,23 +1365,20 @@ xfs_attr_leaf_removename( * Returns 0 on successful retrieval, otherwise an error. */ STATIC int -xfs_attr_leaf_get(xfs_da_args_t *args) +xfs_attr_leaf_get( + struct xfs_da_args *args) { - struct xfs_buf *bp; - int error; + struct xfs_buf *bp; + int error; trace_xfs_attr_leaf_get(args); - error = xfs_attr_leaf_hasname(args, &bp); - - if (error == -ENOATTR) { - xfs_trans_brelse(args->trans, bp); - return error; - } else if (error != -EEXIST) + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp); + if (error) return error; - - - error = xfs_attr3_leaf_getvalue(bp, args); + error = xfs_attr3_leaf_lookup_int(bp, args); + if (error == -EEXIST) + error = xfs_attr3_leaf_getvalue(bp, args); xfs_trans_brelse(args->trans, bp); return error; } diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 0e51d0723f9a..8244305949de 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -573,7 +573,7 @@ struct xfs_trans_res xfs_attr_set_resv(const struct xfs_da_args *args); */ static inline bool xfs_attr_is_shortform( - struct xfs_inode *ip) + const struct xfs_inode *ip) { return ip->i_af.if_format == XFS_DINODE_FMT_LOCAL || (ip->i_af.if_format == XFS_DINODE_FMT_EXTENTS && @@ -649,4 +649,8 @@ void xfs_attr_intent_destroy_cache(void); int xfs_attr_sf_totsize(struct xfs_inode *dp); int xfs_attr_add_fork(struct xfs_inode *ip, int size, int rsvd); +int xfs_attr_setname(struct xfs_da_args *args, int rmt_blks); +int xfs_attr_removename(struct xfs_da_args *args); +int xfs_attr_replacename(struct xfs_da_args *args, int rmt_blks); + #endif /* __XFS_ATTR_H__ */ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 91c1b30ebaab..47f48ae555c0 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -75,6 +75,59 @@ STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args, int move_count); STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); +/* Compute the byte offset of the end of the leaf entry array. */ +static inline int +xfs_attr_leaf_entries_end( + unsigned int hdrcount, + const struct xfs_attr_leafblock *leaf) +{ + return hdrcount * sizeof(struct xfs_attr_leaf_entry) + + xfs_attr3_leaf_hdr_size(leaf); +} + +static inline bool +ichdr_freemaps_overlap( + const struct xfs_attr3_icleaf_hdr *ichdr, + unsigned int x, + unsigned int y) +{ + const unsigned int xend = + ichdr->freemap[x].base + ichdr->freemap[x].size; + const unsigned int yend = + ichdr->freemap[y].base + ichdr->freemap[y].size; + + /* empty slots do not overlap */ + if (!ichdr->freemap[x].size || !ichdr->freemap[y].size) + return false; + + return ichdr->freemap[x].base < yend && xend > ichdr->freemap[y].base; +} + +static inline xfs_failaddr_t +xfs_attr_leaf_ichdr_freemaps_verify( + const struct xfs_attr3_icleaf_hdr *ichdr, + const struct xfs_attr_leafblock *leaf) +{ + unsigned int entries_end = + xfs_attr_leaf_entries_end(ichdr->count, leaf); + int i; + + if (ichdr_freemaps_overlap(ichdr, 0, 1)) + return __this_address; + if (ichdr_freemaps_overlap(ichdr, 0, 2)) + return __this_address; + if (ichdr_freemaps_overlap(ichdr, 1, 2)) + return __this_address; + + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + if (ichdr->freemap[i].size > 0 && + ichdr->freemap[i].base < entries_end) + return __this_address; + } + + return NULL; +} + /* * attr3 block 'firstused' conversion helpers. * @@ -218,6 +271,8 @@ xfs_attr3_leaf_hdr_to_disk( hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base); hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size); } + + ASSERT(xfs_attr_leaf_ichdr_freemaps_verify(from, to) == NULL); return; } to->hdr.info.forw = cpu_to_be32(from->forw); @@ -233,6 +288,8 @@ xfs_attr3_leaf_hdr_to_disk( to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base); to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size); } + + ASSERT(xfs_attr_leaf_ichdr_freemaps_verify(from, to) == NULL); } static xfs_failaddr_t @@ -385,6 +442,10 @@ xfs_attr3_leaf_verify( return __this_address; } + fa = xfs_attr_leaf_ichdr_freemaps_verify(&ichdr, leaf); + if (fa) + return fa; + return NULL; } @@ -782,6 +843,44 @@ xfs_attr_sf_findname( } /* + * Replace a shortform xattr if it's the right length. Returns 0 on success, + * -ENOSPC if the length is wrong, or -ENOATTR if the attr was not found. + */ +int +xfs_attr_shortform_replace( + struct xfs_da_args *args) +{ + struct xfs_attr_sf_entry *sfe; + + ASSERT(args->dp->i_af.if_format == XFS_DINODE_FMT_LOCAL); + + trace_xfs_attr_sf_replace(args); + + sfe = xfs_attr_sf_findname(args); + if (!sfe) + return -ENOATTR; + + if (args->attr_filter & XFS_ATTR_PARENT) { + if (sfe->namelen != args->new_namelen || + sfe->valuelen != args->new_valuelen) + return -ENOSPC; + + memcpy(sfe->nameval, args->new_name, sfe->namelen); + memcpy(&sfe->nameval[sfe->namelen], args->new_value, + sfe->valuelen); + } else { + if (sfe->valuelen != args->valuelen) + return -ENOSPC; + memcpy(&sfe->nameval[sfe->namelen], args->value, + sfe->valuelen); + } + + xfs_trans_log_inode(args->trans, args->dp, + XFS_ILOG_CORE | XFS_ILOG_ADATA); + return 0; +} + +/* * Add a name/value pair to the shortform attribute list. * Overflow from the inode has already been checked for. */ @@ -1409,8 +1508,7 @@ xfs_attr3_leaf_add( * Search through freemap for first-fit on new name length. * (may need to figure in size of entry struct too) */ - tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf); + tablesize = xfs_attr_leaf_entries_end(ichdr.count + 1, leaf); for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) { if (tablesize > ichdr.firstused) { sum += ichdr.freemap[i].size; @@ -1476,6 +1574,7 @@ xfs_attr3_leaf_add_work( struct xfs_attr_leaf_name_local *name_loc; struct xfs_attr_leaf_name_remote *name_rmt; struct xfs_mount *mp; + int old_end, new_end; int tmp; int i; @@ -1568,17 +1667,48 @@ xfs_attr3_leaf_add_work( if (be16_to_cpu(entry->nameidx) < ichdr->firstused) ichdr->firstused = be16_to_cpu(entry->nameidx); - ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf)); - tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf); + new_end = xfs_attr_leaf_entries_end(ichdr->count, leaf); + old_end = new_end - sizeof(struct xfs_attr_leaf_entry); + + ASSERT(ichdr->firstused >= new_end); for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - if (ichdr->freemap[i].base == tmp) { - ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t); + int diff = 0; + + if (ichdr->freemap[i].base == old_end) { + /* + * This freemap entry starts at the old end of the + * leaf entry array, so we need to adjust its base + * upward to accomodate the larger array. + */ + diff = sizeof(struct xfs_attr_leaf_entry); + } else if (ichdr->freemap[i].size > 0 && + ichdr->freemap[i].base < new_end) { + /* + * This freemap entry starts in the space claimed by + * the new leaf entry. Adjust its base upward to + * reflect that. + */ + diff = new_end - ichdr->freemap[i].base; + } + + if (diff) { + ichdr->freemap[i].base += diff; ichdr->freemap[i].size -= - min_t(uint16_t, ichdr->freemap[i].size, - sizeof(xfs_attr_leaf_entry_t)); + min_t(uint16_t, ichdr->freemap[i].size, diff); + } + + /* + * Don't leave zero-length freemaps with nonzero base lying + * around, because we don't want the code in _remove that + * matches on base address to get confused and create + * overlapping freemaps. If we end up with no freemap entries + * then the next _add will compact the leaf block and + * regenerate the freemaps. + */ + if (ichdr->freemap[i].size == 0 && ichdr->freemap[i].base > 0) { + ichdr->freemap[i].base = 0; + ichdr->holes = 1; } } ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index); @@ -1623,6 +1753,10 @@ xfs_attr3_leaf_compact( ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src); ichdr_dst->freemap[0].size = ichdr_dst->firstused - ichdr_dst->freemap[0].base; + ichdr_dst->freemap[1].base = 0; + ichdr_dst->freemap[2].base = 0; + ichdr_dst->freemap[1].size = 0; + ichdr_dst->freemap[2].size = 0; /* write the header back to initialise the underlying buffer */ xfs_attr3_leaf_hdr_to_disk(args->geo, leaf_dst, ichdr_dst); @@ -1774,8 +1908,8 @@ xfs_attr3_leaf_rebalance( /* * leaf2 is the destination, compact it if it looks tight. */ - max = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1); - max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t); + max = ichdr2.firstused - + xfs_attr_leaf_entries_end(ichdr2.count, leaf1); if (space > max) xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp); @@ -1803,8 +1937,8 @@ xfs_attr3_leaf_rebalance( /* * leaf1 is the destination, compact it if it looks tight. */ - max = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1); - max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t); + max = ichdr1.firstused - + xfs_attr_leaf_entries_end(ichdr1.count, leaf1); if (space > max) xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp); @@ -2010,9 +2144,7 @@ xfs_attr3_leaf_toosmall( blk = &state->path.blk[ state->path.active-1 ]; leaf = blk->bp->b_addr; xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr, leaf); - bytes = xfs_attr3_leaf_hdr_size(leaf) + - ichdr.count * sizeof(xfs_attr_leaf_entry_t) + - ichdr.usedbytes; + bytes = xfs_attr_leaf_entries_end(ichdr.count, leaf) + ichdr.usedbytes; if (bytes > (state->args->geo->blksize >> 1)) { *action = 0; /* blk over 50%, don't try to join */ return 0; @@ -2070,9 +2202,8 @@ xfs_attr3_leaf_toosmall( bytes = state->args->geo->blksize - (state->args->geo->blksize >> 2) - ichdr.usedbytes - ichdr2.usedbytes - - ((ichdr.count + ichdr2.count) * - sizeof(xfs_attr_leaf_entry_t)) - - xfs_attr3_leaf_hdr_size(leaf); + xfs_attr_leaf_entries_end(ichdr.count + ichdr2.count, + leaf); xfs_trans_brelse(state->args->trans, bp); if (bytes >= 0) @@ -2134,8 +2265,7 @@ xfs_attr3_leaf_remove( ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); ASSERT(args->index >= 0 && args->index < ichdr.count); - ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) + - xfs_attr3_leaf_hdr_size(leaf)); + ASSERT(ichdr.firstused >= xfs_attr_leaf_entries_end(ichdr.count, leaf)); entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; @@ -2148,8 +2278,7 @@ xfs_attr3_leaf_remove( * find smallest free region in case we need to replace it, * adjust any map that borders the entry table, */ - tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf); + tablesize = xfs_attr_leaf_entries_end(ichdr.count, leaf); tmp = ichdr.freemap[0].size; before = after = -1; smallest = XFS_ATTR_LEAF_MAPSIZE - 1; @@ -2256,8 +2385,7 @@ xfs_attr3_leaf_remove( * Check if leaf is less than 50% full, caller may want to * "join" the leaf with a sibling if so. */ - tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) + - ichdr.count * sizeof(xfs_attr_leaf_entry_t); + tmp = ichdr.usedbytes + xfs_attr_leaf_entries_end(ichdr.count, leaf); return tmp < args->geo->magicpct; /* leaf is < 37% full */ } @@ -2580,11 +2708,11 @@ xfs_attr3_leaf_moveents( ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC); ASSERT(ichdr_s->magic == ichdr_d->magic); ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8); - ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s)) - + xfs_attr3_leaf_hdr_size(leaf_s)); + ASSERT(ichdr_s->firstused >= + xfs_attr_leaf_entries_end(ichdr_s->count, leaf_s)); ASSERT(ichdr_d->count < args->geo->blksize / 8); - ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d)) - + xfs_attr3_leaf_hdr_size(leaf_d)); + ASSERT(ichdr_d->firstused >= + xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d)); ASSERT(start_s < ichdr_s->count); ASSERT(start_d <= ichdr_d->count); @@ -2644,8 +2772,7 @@ xfs_attr3_leaf_moveents( ichdr_d->usedbytes += tmp; ichdr_s->count -= 1; ichdr_d->count += 1; - tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf_d); + tmp = xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d); ASSERT(ichdr_d->firstused >= tmp); #ifdef GROT } @@ -2681,8 +2808,8 @@ xfs_attr3_leaf_moveents( /* * Fill in the freemap information */ - ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d); - ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t); + ichdr_d->freemap[0].base = + xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d); ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base; ichdr_d->freemap[1].base = 0; ichdr_d->freemap[2].base = 0; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 589f810eedc0..aca46da2bc50 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -46,6 +46,7 @@ struct xfs_attr3_icleaf_hdr { * Internal routines when attribute fork size < XFS_LITINO(mp). */ void xfs_attr_shortform_create(struct xfs_da_args *args); +int xfs_attr_shortform_replace(struct xfs_da_args *args); void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); int xfs_attr_shortform_getvalue(struct xfs_da_args *args); int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index bff3dc226f81..e6c8dd1a997a 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c index 40ce5f3094d1..f05a07c0f75d 100644 --- a/fs/xfs/libxfs/xfs_bit.c +++ b/fs/xfs/libxfs/xfs_bit.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_log_format.h" #include "xfs_bit.h" diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 53ef4b7e504d..7a4c8f1aa76c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 188feac04b60..1c7165df483a 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index dbe9df8c3300..7012f3570c8d 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_btree_mem.c b/fs/xfs/libxfs/xfs_btree_mem.c index f2f7b4305413..37136a70e56d 100644 --- a/fs/xfs/libxfs/xfs_btree_mem.c +++ b/fs/xfs/libxfs/xfs_btree_mem.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c index 5ed84f9cc877..4300c058807b 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.c +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -3,7 +3,7 @@ * Copyright (C) 2020 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 90f7fc219fcc..766631f0562e 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 86de99e2f757..7d55307e619f 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -746,7 +746,7 @@ struct xfs_attr3_leafblock { #define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t)) static inline int -xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp) +xfs_attr3_leaf_hdr_size(const struct xfs_attr_leafblock *leafp) { if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) return sizeof(struct xfs_attr3_leaf_hdr); diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 5b377cbbb1f7..c39e40dcb0b0 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -565,7 +565,7 @@ xfs_defer_relog( continue; trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp); - XFS_STATS_INC((*tpp)->t_mountp, defer_relog); + XFS_STATS_INC((*tpp)->t_mountp, xs_defer_relog); xfs_defer_relog_intent(*tpp, dfp); } diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 82a338458a51..107c1a5b8a96 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 0f93ed1a4a74..6d70e6b429e7 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index a16b05c43e2e..80ba94f51e5c 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index 71c2f22a3f6e..bc909543eb74 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index fe8d4fa13128..ed0b5287a44f 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 17a20384c8b7..1a67cdd6a707 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index dceef2abd4e2..ce767b40482f 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index 57e47077c75a..6de207fed2d8 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -53,7 +53,7 @@ * Drop-writes support removed because write error handling cannot trash * pre-existing delalloc extents in any useful way anymore. We retain the * definition so that we can reject it as an invalid value in - * xfs_errortag_valid(). + * xfs_errortag_add(). */ #define XFS_ERRTAG_DROP_WRITES 28 #define XFS_ERRTAG_LOG_BAD_CRC 29 @@ -74,7 +74,8 @@ #define XFS_ERRTAG_EXCHMAPS_FINISH_ONE 44 #define XFS_ERRTAG_METAFILE_RESV_CRITICAL 45 #define XFS_ERRTAG_FORCE_ZERO_RANGE 46 -#define XFS_ERRTAG_MAX 47 +#define XFS_ERRTAG_ZONE_RESET 47 +#define XFS_ERRTAG_MAX 48 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -135,7 +136,8 @@ XFS_ERRTAG(WB_DELAY_MS, wb_delay_ms, 3000) \ XFS_ERRTAG(WRITE_DELAY_MS, write_delay_ms, 3000) \ XFS_ERRTAG(EXCHMAPS_FINISH_ONE, exchmaps_finish_one, 1) \ XFS_ERRTAG(METAFILE_RESV_CRITICAL, metafile_resv_crit, 4) \ -XFS_ERRTAG(FORCE_ZERO_RANGE, force_zero_range, 4) +XFS_ERRTAG(FORCE_ZERO_RANGE, force_zero_range, 4) \ +XFS_ERRTAG(ZONE_RESET, zone_reset, 1) #endif /* XFS_ERRTAG */ #endif /* __XFS_ERRORTAG_H_ */ diff --git a/fs/xfs/libxfs/xfs_exchmaps.c b/fs/xfs/libxfs/xfs_exchmaps.c index 932ee4619e9e..5d28f4eac527 100644 --- a/fs/xfs/libxfs/xfs_exchmaps.c +++ b/fs/xfs/libxfs/xfs_exchmaps.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 12463ba766da..d165de607d17 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1003,6 +1003,191 @@ struct xfs_rtgroup_geometry { #define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */ #define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */ +/* Health monitor event domains */ + +/* affects the whole fs */ +#define XFS_HEALTH_MONITOR_DOMAIN_MOUNT (0) + +/* metadata health events */ +#define XFS_HEALTH_MONITOR_DOMAIN_FS (1) +#define XFS_HEALTH_MONITOR_DOMAIN_AG (2) +#define XFS_HEALTH_MONITOR_DOMAIN_INODE (3) +#define XFS_HEALTH_MONITOR_DOMAIN_RTGROUP (4) + +/* disk events */ +#define XFS_HEALTH_MONITOR_DOMAIN_DATADEV (5) +#define XFS_HEALTH_MONITOR_DOMAIN_RTDEV (6) +#define XFS_HEALTH_MONITOR_DOMAIN_LOGDEV (7) + +/* file range events */ +#define XFS_HEALTH_MONITOR_DOMAIN_FILERANGE (8) + +/* Health monitor event types */ + +/* status of the monitor itself */ +#define XFS_HEALTH_MONITOR_TYPE_RUNNING (0) +#define XFS_HEALTH_MONITOR_TYPE_LOST (1) + +/* filesystem was unmounted */ +#define XFS_HEALTH_MONITOR_TYPE_UNMOUNT (2) + +/* metadata health events */ +#define XFS_HEALTH_MONITOR_TYPE_SICK (3) +#define XFS_HEALTH_MONITOR_TYPE_CORRUPT (4) +#define XFS_HEALTH_MONITOR_TYPE_HEALTHY (5) + +/* filesystem shutdown */ +#define XFS_HEALTH_MONITOR_TYPE_SHUTDOWN (6) + +/* media errors */ +#define XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR (7) + +/* pagecache I/O to a file range failed */ +#define XFS_HEALTH_MONITOR_TYPE_BUFREAD (8) +#define XFS_HEALTH_MONITOR_TYPE_BUFWRITE (9) + +/* direct I/O to a file range failed */ +#define XFS_HEALTH_MONITOR_TYPE_DIOREAD (10) +#define XFS_HEALTH_MONITOR_TYPE_DIOWRITE (11) + +/* out of band media error reported for a file range */ +#define XFS_HEALTH_MONITOR_TYPE_DATALOST (12) + +/* lost events */ +struct xfs_health_monitor_lost { + __u64 count; +}; + +/* fs/rt metadata */ +struct xfs_health_monitor_fs { + /* XFS_FSOP_GEOM_SICK_* flags */ + __u32 mask; +}; + +/* ag/rtgroup metadata */ +struct xfs_health_monitor_group { + /* XFS_{AG,RTGROUP}_SICK_* flags */ + __u32 mask; + __u32 gno; +}; + +/* inode metadata */ +struct xfs_health_monitor_inode { + /* XFS_BS_SICK_* flags */ + __u32 mask; + __u32 gen; + __u64 ino; +}; + +/* shutdown reasons */ +#define XFS_HEALTH_SHUTDOWN_META_IO_ERROR (1u << 0) +#define XFS_HEALTH_SHUTDOWN_LOG_IO_ERROR (1u << 1) +#define XFS_HEALTH_SHUTDOWN_FORCE_UMOUNT (1u << 2) +#define XFS_HEALTH_SHUTDOWN_CORRUPT_INCORE (1u << 3) +#define XFS_HEALTH_SHUTDOWN_CORRUPT_ONDISK (1u << 4) +#define XFS_HEALTH_SHUTDOWN_DEVICE_REMOVED (1u << 5) + +/* shutdown */ +struct xfs_health_monitor_shutdown { + /* XFS_HEALTH_SHUTDOWN_* flags */ + __u32 reasons; +}; + +/* file range events */ +struct xfs_health_monitor_filerange { + __u64 pos; + __u64 len; + __u64 ino; + __u32 gen; + __u32 error; +}; + +/* disk media errors */ +struct xfs_health_monitor_media { + __u64 daddr; + __u64 bbcount; +}; + +struct xfs_health_monitor_event { + /* XFS_HEALTH_MONITOR_DOMAIN_* */ + __u32 domain; + + /* XFS_HEALTH_MONITOR_TYPE_* */ + __u32 type; + + /* Timestamp of the event, in nanoseconds since the Unix epoch */ + __u64 time_ns; + + /* + * Details of the event. The primary clients are written in python + * and rust, so break this up because bindgen hates anonymous structs + * and unions. + */ + union { + struct xfs_health_monitor_lost lost; + struct xfs_health_monitor_fs fs; + struct xfs_health_monitor_group group; + struct xfs_health_monitor_inode inode; + struct xfs_health_monitor_shutdown shutdown; + struct xfs_health_monitor_media media; + struct xfs_health_monitor_filerange filerange; + } e; + + /* zeroes */ + __u64 pad[2]; +}; + +struct xfs_health_monitor { + __u64 flags; /* flags */ + __u8 format; /* output format */ + __u8 pad[23]; /* zeroes */ +}; + +/* Return all health status events, not just deltas */ +#define XFS_HEALTH_MONITOR_VERBOSE (1ULL << 0) + +#define XFS_HEALTH_MONITOR_ALL (XFS_HEALTH_MONITOR_VERBOSE) + +/* Initial return format version */ +#define XFS_HEALTH_MONITOR_FMT_V0 (0) + +/* + * Check that a given fd points to the same filesystem that the health monitor + * is monitoring. + */ +struct xfs_health_file_on_monitored_fs { + __s32 fd; + __u32 flags; /* zero for now */ +}; + +/* Verify the media of the underlying devices */ +struct xfs_verify_media { + __u32 me_dev; /* I: XFS_DEV_{DATA,LOG,RT} */ + __u32 me_flags; /* I: XFS_VERIFY_MEDIA_* */ + + /* + * IO: inclusive start of disk range to verify, in 512b blocks. + * Will be adjusted upwards as media verification succeeds. + */ + __u64 me_start_daddr; + + /* + * IO: exclusive end of the disk range to verify, in 512b blocks. + * Can be adjusted downwards to match device size. + */ + __u64 me_end_daddr; + + __u32 me_ioerror; /* O: I/O error (positive) */ + __u32 me_max_io_size; /* I: maximum IO size in bytes */ + + __u32 me_rest_us; /* I: rest time between IOs, usecs */ + __u32 me_pad; /* zero */ +}; + +#define XFS_VERIFY_MEDIA_REPORT (1 << 0) /* report to fsnotify */ + +#define XFS_VERIFY_MEDIA_FLAGS (XFS_VERIFY_MEDIA_REPORT) + /* * ioctl commands that are used by Linux filesystems */ @@ -1042,6 +1227,10 @@ struct xfs_rtgroup_geometry { #define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle) #define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head) #define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry) +#define XFS_IOC_HEALTH_MONITOR _IOW ('X', 68, struct xfs_health_monitor) +#define XFS_IOC_HEALTH_FD_ON_MONITORED_FS \ + _IOW ('X', 69, struct xfs_health_file_on_monitored_fs) +#define XFS_IOC_VERIFY_MEDIA _IOWR('X', 70, struct xfs_verify_media) /* * ioctl commands that replace IRIX syssgi()'s diff --git a/fs/xfs/libxfs/xfs_group.c b/fs/xfs/libxfs/xfs_group.c index 792f76d2e2a0..2ff9d1e56b47 100644 --- a/fs/xfs/libxfs/xfs_group.c +++ b/fs/xfs/libxfs/xfs_group.c @@ -3,7 +3,7 @@ * Copyright (c) 2018 Red Hat, Inc. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index b31000f7190c..1d45cf5789e8 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -289,4 +289,9 @@ void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs); #define xfs_metadata_is_sick(error) \ (unlikely((error) == -EFSCORRUPTED || (error) == -EFSBADCRC)) +unsigned int xfs_healthmon_inode_mask(unsigned int sick_mask); +unsigned int xfs_healthmon_rtgroup_mask(unsigned int sick_mask); +unsigned int xfs_healthmon_perag_mask(unsigned int sick_mask); +unsigned int xfs_healthmon_fs_mask(unsigned int sick_mask); + #endif /* __XFS_HEALTH_H__ */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index c19d6d713780..dcef06ec0a02 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 100afdd66cdd..1376e8630449 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 8796f2b3e534..5b2b926ab228 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -3,7 +3,7 @@ * Copyright (c) 2017 Christoph Hellwig. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_bit.h" diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index b1812b2c3cce..a017016e9075 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 1772d82f2d68..d14a7f2f4c03 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -4,7 +4,7 @@ * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c index 309ce6dd5553..551fa51befb6 100644 --- a/fs/xfs/libxfs/xfs_inode_util.c +++ b/fs/xfs/libxfs/xfs_inode_util.c @@ -4,7 +4,7 @@ * All Rights Reserved. */ #include <linux/iversion.h> -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 908e7060428c..3f5a24dda907 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -184,13 +184,6 @@ struct xlog_rec_header { #define XLOG_REC_SIZE_OTHER offsetofend(struct xlog_rec_header, h_size) #endif /* __i386__ */ -/* not an on-disk structure, but needed by log recovery in userspace */ -struct xfs_log_iovec { - void *i_addr; /* beginning address of region */ - int i_len; /* length in bytes of region */ - uint i_type; /* type of region */ -}; - /* * Transaction Header definitions. * diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index 34bba96d30ca..37712b2f8757 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -3,7 +3,7 @@ * Copyright (c) 2013 Jie Liu. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_metadir.c b/fs/xfs/libxfs/xfs_metadir.c index 178e89711cb7..3e5c61188927 100644 --- a/fs/xfs/libxfs/xfs_metadir.c +++ b/fs/xfs/libxfs/xfs_metadir.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c index b02e3d6c0868..cf239f862212 100644 --- a/fs/xfs/libxfs/xfs_metafile.c +++ b/fs/xfs/libxfs/xfs_metafile.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index 69366c44a701..3509cc4b2175 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. * All rights reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_da_format.h" @@ -29,6 +29,7 @@ #include "xfs_trans_space.h" #include "xfs_attr_item.h" #include "xfs_health.h" +#include "xfs_attr_leaf.h" struct kmem_cache *xfs_parent_args_cache; @@ -202,8 +203,8 @@ xfs_parent_addname( xfs_inode_to_parent_rec(&ppargs->rec, dp); xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child, child->i_ino, parent_name); - xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_SET); - return 0; + + return xfs_attr_setname(&ppargs->args, 0); } /* Remove a parent pointer to reflect a dirent removal. */ @@ -224,8 +225,8 @@ xfs_parent_removename( xfs_inode_to_parent_rec(&ppargs->rec, dp); xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child, child->i_ino, parent_name); - xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REMOVE); - return 0; + + return xfs_attr_removename(&ppargs->args); } /* Replace one parent pointer with another to reflect a rename. */ @@ -250,12 +251,13 @@ xfs_parent_replacename( child->i_ino, old_name); xfs_inode_to_parent_rec(&ppargs->new_rec, new_dp); + ppargs->args.new_name = new_name->name; ppargs->args.new_namelen = new_name->len; ppargs->args.new_value = &ppargs->new_rec; ppargs->args.new_valuelen = sizeof(struct xfs_parent_rec); - xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REPLACE); - return 0; + + return xfs_attr_replacename(&ppargs->args, 0); } /* diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 2484dc9f6d7e..915ec85530e7 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 06da3ca14727..7e5f92c1ac56 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 83e0488ff773..e78133c908ca 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -3,7 +3,7 @@ * Copyright (c) 2014 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index bf16aee50d73..10b3272238eb 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2014 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 618061d898d4..bc4c0a99f4dd 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c index be16efaa6925..09328f2d1575 100644 --- a/fs/xfs/libxfs/xfs_rtgroup.c +++ b/fs/xfs/libxfs/xfs_rtgroup.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h index 73cace4d25c7..c0b9f9f2c413 100644 --- a/fs/xfs/libxfs/xfs_rtgroup.h +++ b/fs/xfs/libxfs/xfs_rtgroup.h @@ -371,4 +371,19 @@ xfs_rtgs_to_rfsbs( return xfs_groups_to_rfsbs(mp, nr_groups, XG_TYPE_RTG); } +/* + * Return the "raw" size of a group on the hardware device. This includes the + * daddr gaps present for XFS_SB_FEAT_INCOMPAT_ZONE_GAPS file systems. + */ +static inline xfs_rgblock_t +xfs_rtgroup_raw_size( + struct xfs_mount *mp) +{ + struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG]; + + if (g->has_daddr_gaps) + return 1U << g->blklog; + return g->blocks; +} + #endif /* __LIBXFS_RTGROUP_H */ diff --git a/fs/xfs/libxfs/xfs_rtrefcount_btree.c b/fs/xfs/libxfs/xfs_rtrefcount_btree.c index ac11e94b42ae..c1518267eb17 100644 --- a/fs/xfs/libxfs/xfs_rtrefcount_btree.c +++ b/fs/xfs/libxfs/xfs_rtrefcount_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.c b/fs/xfs/libxfs/xfs_rtrmap_btree.c index 55f903165769..00557b7ef298 100644 --- a/fs/xfs/libxfs/xfs_rtrmap_btree.c +++ b/fs/xfs/libxfs/xfs_rtrmap_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 94c272a2ae26..38d16fe1f6d8 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index fb47a76ead18..f9a5966d8048 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -4,7 +4,7 @@ * Copyright (c) 2012-2013 Red Hat, Inc. * All rights reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index c962ad64b0c1..1a0fdcbf39fa 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -3,7 +3,7 @@ * Copyright (c) 2000,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 86a111d0f2fc..3151e97ca8ff 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -4,7 +4,7 @@ * Copyright (C) 2010 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_trans_space.c b/fs/xfs/libxfs/xfs_trans_space.c index b9dc3752f702..9b8f495c9049 100644 --- a/fs/xfs/libxfs/xfs_trans_space.c +++ b/fs/xfs/libxfs/xfs_trans_space.c @@ -3,7 +3,7 @@ * Copyright (c) 2000,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index 1faf04204c5d..67c947a47f14 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -4,7 +4,7 @@ * Copyright (C) 2017 Oracle. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_shared.h" diff --git a/fs/xfs/libxfs/xfs_zones.c b/fs/xfs/libxfs/xfs_zones.c index b40f71f878b5..24e350c31933 100644 --- a/fs/xfs/libxfs/xfs_zones.c +++ b/fs/xfs/libxfs/xfs_zones.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2025 Christoph Hellwig. * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -15,173 +15,102 @@ #include "xfs_zones.h" static bool -xfs_zone_validate_empty( +xfs_validate_blk_zone_seq( + struct xfs_mount *mp, struct blk_zone *zone, - struct xfs_rtgroup *rtg, + unsigned int zone_no, xfs_rgblock_t *write_pointer) { - struct xfs_mount *mp = rtg_mount(rtg); - - if (rtg_rmap(rtg)->i_used_blocks > 0) { - xfs_warn(mp, "empty zone %u has non-zero used counter (0x%x).", - rtg_rgno(rtg), rtg_rmap(rtg)->i_used_blocks); - return false; - } - - *write_pointer = 0; - return true; -} - -static bool -xfs_zone_validate_wp( - struct blk_zone *zone, - struct xfs_rtgroup *rtg, - xfs_rgblock_t *write_pointer) -{ - struct xfs_mount *mp = rtg_mount(rtg); - xfs_rtblock_t wp_fsb = xfs_daddr_to_rtb(mp, zone->wp); - - if (rtg_rmap(rtg)->i_used_blocks > rtg->rtg_extents) { - xfs_warn(mp, "zone %u has too large used counter (0x%x).", - rtg_rgno(rtg), rtg_rmap(rtg)->i_used_blocks); - return false; - } - - if (xfs_rtb_to_rgno(mp, wp_fsb) != rtg_rgno(rtg)) { - xfs_warn(mp, "zone %u write pointer (0x%llx) outside of zone.", - rtg_rgno(rtg), wp_fsb); - return false; - } - - *write_pointer = xfs_rtb_to_rgbno(mp, wp_fsb); - if (*write_pointer >= rtg->rtg_extents) { - xfs_warn(mp, "zone %u has invalid write pointer (0x%x).", - rtg_rgno(rtg), *write_pointer); - return false; - } - - return true; -} - -static bool -xfs_zone_validate_full( - struct blk_zone *zone, - struct xfs_rtgroup *rtg, - xfs_rgblock_t *write_pointer) -{ - struct xfs_mount *mp = rtg_mount(rtg); - - if (rtg_rmap(rtg)->i_used_blocks > rtg->rtg_extents) { - xfs_warn(mp, "zone %u has too large used counter (0x%x).", - rtg_rgno(rtg), rtg_rmap(rtg)->i_used_blocks); - return false; - } - - *write_pointer = rtg->rtg_extents; - return true; -} - -static bool -xfs_zone_validate_seq( - struct blk_zone *zone, - struct xfs_rtgroup *rtg, - xfs_rgblock_t *write_pointer) -{ - struct xfs_mount *mp = rtg_mount(rtg); - switch (zone->cond) { case BLK_ZONE_COND_EMPTY: - return xfs_zone_validate_empty(zone, rtg, write_pointer); + *write_pointer = 0; + return true; case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_EXP_OPEN: case BLK_ZONE_COND_CLOSED: case BLK_ZONE_COND_ACTIVE: - return xfs_zone_validate_wp(zone, rtg, write_pointer); + if (zone->wp < zone->start || + zone->wp >= zone->start + zone->capacity) { + xfs_warn(mp, + "zone %u write pointer (%llu) outside of zone.", + zone_no, zone->wp); + return false; + } + + *write_pointer = XFS_BB_TO_FSB(mp, zone->wp - zone->start); + return true; case BLK_ZONE_COND_FULL: - return xfs_zone_validate_full(zone, rtg, write_pointer); + *write_pointer = XFS_BB_TO_FSB(mp, zone->capacity); + return true; case BLK_ZONE_COND_NOT_WP: case BLK_ZONE_COND_OFFLINE: case BLK_ZONE_COND_READONLY: xfs_warn(mp, "zone %u has unsupported zone condition 0x%x.", - rtg_rgno(rtg), zone->cond); + zone_no, zone->cond); return false; default: xfs_warn(mp, "zone %u has unknown zone condition 0x%x.", - rtg_rgno(rtg), zone->cond); + zone_no, zone->cond); return false; } } static bool -xfs_zone_validate_conv( +xfs_validate_blk_zone_conv( + struct xfs_mount *mp, struct blk_zone *zone, - struct xfs_rtgroup *rtg) + unsigned int zone_no) { - struct xfs_mount *mp = rtg_mount(rtg); - switch (zone->cond) { case BLK_ZONE_COND_NOT_WP: return true; default: xfs_warn(mp, "conventional zone %u has unsupported zone condition 0x%x.", - rtg_rgno(rtg), zone->cond); + zone_no, zone->cond); return false; } } bool -xfs_zone_validate( +xfs_validate_blk_zone( + struct xfs_mount *mp, struct blk_zone *zone, - struct xfs_rtgroup *rtg, + unsigned int zone_no, + uint32_t expected_size, + uint32_t expected_capacity, xfs_rgblock_t *write_pointer) { - struct xfs_mount *mp = rtg_mount(rtg); - struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG]; - uint32_t expected_size; - /* * Check that the zone capacity matches the rtgroup size stored in the * superblock. Note that all zones including the last one must have a * uniform capacity. */ - if (XFS_BB_TO_FSB(mp, zone->capacity) != g->blocks) { + if (XFS_BB_TO_FSB(mp, zone->capacity) != expected_capacity) { xfs_warn(mp, -"zone %u capacity (0x%llx) does not match RT group size (0x%x).", - rtg_rgno(rtg), XFS_BB_TO_FSB(mp, zone->capacity), - g->blocks); +"zone %u capacity (%llu) does not match RT group size (%u).", + zone_no, XFS_BB_TO_FSB(mp, zone->capacity), + expected_capacity); return false; } - if (g->has_daddr_gaps) { - expected_size = 1 << g->blklog; - } else { - if (zone->len != zone->capacity) { - xfs_warn(mp, -"zone %u has capacity != size ((0x%llx vs 0x%llx)", - rtg_rgno(rtg), - XFS_BB_TO_FSB(mp, zone->len), - XFS_BB_TO_FSB(mp, zone->capacity)); - return false; - } - expected_size = g->blocks; - } - if (XFS_BB_TO_FSB(mp, zone->len) != expected_size) { xfs_warn(mp, -"zone %u length (0x%llx) does match geometry (0x%x).", - rtg_rgno(rtg), XFS_BB_TO_FSB(mp, zone->len), +"zone %u length (%llu) does not match geometry (%u).", + zone_no, XFS_BB_TO_FSB(mp, zone->len), expected_size); + return false; } switch (zone->type) { case BLK_ZONE_TYPE_CONVENTIONAL: - return xfs_zone_validate_conv(zone, rtg); + return xfs_validate_blk_zone_conv(mp, zone, zone_no); case BLK_ZONE_TYPE_SEQWRITE_REQ: - return xfs_zone_validate_seq(zone, rtg, write_pointer); + return xfs_validate_blk_zone_seq(mp, zone, zone_no, + write_pointer); default: xfs_warn(mp, "zoned %u has unsupported type 0x%x.", - rtg_rgno(rtg), zone->type); + zone_no, zone->type); return false; } } diff --git a/fs/xfs/libxfs/xfs_zones.h b/fs/xfs/libxfs/xfs_zones.h index 5fefd132e002..c16089c9a652 100644 --- a/fs/xfs/libxfs/xfs_zones.h +++ b/fs/xfs/libxfs/xfs_zones.h @@ -3,6 +3,7 @@ #define _LIBXFS_ZONES_H struct xfs_rtgroup; +struct blk_zone; /* * In order to guarantee forward progress for GC we need to reserve at least @@ -36,7 +37,8 @@ struct xfs_rtgroup; */ #define XFS_DEFAULT_MAX_OPEN_ZONES 128 -bool xfs_zone_validate(struct blk_zone *zone, struct xfs_rtgroup *rtg, - xfs_rgblock_t *write_pointer); +bool xfs_validate_blk_zone(struct xfs_mount *mp, struct blk_zone *zone, + unsigned int zone_no, uint32_t expected_size, + uint32_t expected_capacity, xfs_rgblock_t *write_pointer); #endif /* _LIBXFS_ZONES_H */ diff --git a/fs/xfs/scrub/agb_bitmap.c b/fs/xfs/scrub/agb_bitmap.c index 573e4e062754..0194e3aaa1fa 100644 --- a/fs/xfs/scrub/agb_bitmap.c +++ b/fs/xfs/scrub/agb_bitmap.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_bit.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 303374df44bd..7ffe4b0ef0f1 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index cd6f0223879f..15d58eedb387 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -837,8 +837,12 @@ xrep_agi_buf_cleanup( { struct xrep_agi *ragi = buf; - xfarray_destroy(ragi->iunlink_prev); - xfarray_destroy(ragi->iunlink_next); + if (ragi->iunlink_prev) + xfarray_destroy(ragi->iunlink_prev); + ragi->iunlink_prev = NULL; + if (ragi->iunlink_next) + xfarray_destroy(ragi->iunlink_next); + ragi->iunlink_next = NULL; xagino_bitmap_destroy(&ragi->iunlink_bmp); } @@ -1708,7 +1712,6 @@ xrep_agi( { struct xrep_agi *ragi; struct xfs_mount *mp = sc->mp; - char *descr; unsigned int i; int error; @@ -1742,17 +1745,13 @@ xrep_agi( xagino_bitmap_init(&ragi->iunlink_bmp); sc->buf_cleanup = xrep_agi_buf_cleanup; - descr = xchk_xfile_ag_descr(sc, "iunlinked next pointers"); - error = xfarray_create(descr, 0, sizeof(xfs_agino_t), - &ragi->iunlink_next); - kfree(descr); + error = xfarray_create("iunlinked next pointers", 0, + sizeof(xfs_agino_t), &ragi->iunlink_next); if (error) return error; - descr = xchk_xfile_ag_descr(sc, "iunlinked prev pointers"); - error = xfarray_create(descr, 0, sizeof(xfs_agino_t), - &ragi->iunlink_prev); - kfree(descr); + error = xfarray_create("iunlinked prev pointers", 0, + sizeof(xfs_agino_t), &ragi->iunlink_prev); if (error) return error; diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 8b282138097f..48edaa2cb1e0 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c index bed6a09aa791..5b4c2a39a155 100644 --- a/fs/xfs/scrub/alloc_repair.c +++ b/fs/xfs/scrub/alloc_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -850,7 +850,6 @@ xrep_allocbt( struct xrep_abt *ra; struct xfs_mount *mp = sc->mp; unsigned int busy_gen; - char *descr; int error; /* We require the rmapbt to rebuild anything. */ @@ -876,11 +875,9 @@ xrep_allocbt( } /* Set up enough storage to handle maximally fragmented free space. */ - descr = xchk_xfile_ag_descr(sc, "free space records"); - error = xfarray_create(descr, mp->m_sb.sb_agblocks / 2, + error = xfarray_create("free space records", mp->m_sb.sb_agblocks / 2, sizeof(struct xfs_alloc_rec_incore), &ra->free_records); - kfree(descr); if (error) goto out_ra; @@ -926,7 +923,22 @@ xrep_revalidate_allocbt( if (error) goto out; + /* + * If the bnobt is still corrupt, we've failed to repair the filesystem + * and should just bail out. + * + * If the bnobt fails cross-examination with the cntbt, the scan will + * free the cntbt cursor, so we need to mark the repair incomplete + * and avoid walking off the end of the NULL cntbt cursor. + */ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + sc->sm->sm_type = XFS_SCRUB_TYPE_CNTBT; + if (!sc->sa.cnt_cur) { + xchk_set_incomplete(sc); + goto out; + } error = xchk_allocbt(sc); out: sc->sm->sm_type = old_type; diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 708334f9b2bd..c3c122ea2d32 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -288,32 +288,6 @@ xchk_xattr_set_map( } /* - * Check the leaf freemap from the usage bitmap. Returns false if the - * attr freemap has problems or points to used space. - */ -STATIC bool -xchk_xattr_check_freemap( - struct xfs_scrub *sc, - struct xfs_attr3_icleaf_hdr *leafhdr) -{ - struct xchk_xattr_buf *ab = sc->buf; - unsigned int mapsize = sc->mp->m_attr_geo->blksize; - int i; - - /* Construct bitmap of freemap contents. */ - bitmap_zero(ab->freemap, mapsize); - for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - if (!xchk_xattr_set_map(sc, ab->freemap, - leafhdr->freemap[i].base, - leafhdr->freemap[i].size)) - return false; - } - - /* Look for bits that are set in freemap and are marked in use. */ - return !bitmap_intersects(ab->freemap, ab->usedmap, mapsize); -} - -/* * Check this leaf entry's relations to everything else. * Returns the number of bytes used for the name/value data. */ @@ -364,7 +338,10 @@ xchk_xattr_entry( rentry = xfs_attr3_leaf_name_remote(leaf, idx); namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); name_end = (char *)rentry + namesize; - if (rentry->namelen == 0 || rentry->valueblk == 0) + if (rentry->namelen == 0) + xchk_da_set_corrupt(ds, level); + if (rentry->valueblk == 0 && + !(ent->flags & XFS_ATTR_INCOMPLETE)) xchk_da_set_corrupt(ds, level); } if (name_end > buf_end) @@ -403,6 +380,7 @@ xchk_xattr_block( *last_checked = blk->blkno; bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize); + bitmap_zero(ab->freemap, mp->m_attr_geo->blksize); /* Check all the padding. */ if (xfs_has_crc(ds->sc->mp)) { @@ -449,6 +427,9 @@ xchk_xattr_block( if ((char *)&entries[leafhdr.count] > (char *)leaf + leafhdr.firstused) xchk_da_set_corrupt(ds, level); + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) { /* Mark the leaf entry itself. */ @@ -467,7 +448,29 @@ xchk_xattr_block( goto out; } - if (!xchk_xattr_check_freemap(ds->sc, &leafhdr)) + /* Construct bitmap of freemap contents. */ + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + if (!xchk_xattr_set_map(ds->sc, ab->freemap, + leafhdr.freemap[i].base, + leafhdr.freemap[i].size)) + xchk_da_set_corrupt(ds, level); + + /* + * freemap entries with zero length and nonzero base can cause + * problems with older kernels, so we mark these for preening + * even though there's no inconsistency. + */ + if (leafhdr.freemap[i].size == 0 && + leafhdr.freemap[i].base > 0) + xchk_da_set_preen(ds, level); + + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + } + + /* Look for bits that are set in freemap and are marked in use. */ + if (bitmap_intersects(ab->freemap, ab->usedmap, + mp->m_attr_geo->blksize)) xchk_da_set_corrupt(ds, level); if (leafhdr.usedbytes != usedbytes) diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 09d63aa10314..a924b467a844 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -1516,8 +1516,10 @@ xrep_xattr_teardown( xfblob_destroy(rx->pptr_names); if (rx->pptr_recs) xfarray_destroy(rx->pptr_recs); - xfblob_destroy(rx->xattr_blobs); - xfarray_destroy(rx->xattr_records); + if (rx->xattr_blobs) + xfblob_destroy(rx->xattr_blobs); + if (rx->xattr_records) + xfarray_destroy(rx->xattr_records); mutex_destroy(&rx->lock); kfree(rx); } @@ -1529,7 +1531,6 @@ xrep_xattr_setup_scan( struct xrep_xattr **rxp) { struct xrep_xattr *rx; - char *descr; int max_len; int error; @@ -1555,35 +1556,26 @@ xrep_xattr_setup_scan( goto out_rx; /* Set up some staging for salvaged attribute keys and values */ - descr = xchk_xfile_ino_descr(sc, "xattr keys"); - error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key), + error = xfarray_create("xattr keys", 0, sizeof(struct xrep_xattr_key), &rx->xattr_records); - kfree(descr); if (error) goto out_rx; - descr = xchk_xfile_ino_descr(sc, "xattr names"); - error = xfblob_create(descr, &rx->xattr_blobs); - kfree(descr); + error = xfblob_create("xattr names", &rx->xattr_blobs); if (error) goto out_keys; if (xfs_has_parent(sc->mp)) { ASSERT(sc->flags & XCHK_FSGATES_DIRENTS); - descr = xchk_xfile_ino_descr(sc, - "xattr retained parent pointer entries"); - error = xfarray_create(descr, 0, + error = xfarray_create("xattr parent pointer entries", 0, sizeof(struct xrep_xattr_pptr), &rx->pptr_recs); - kfree(descr); if (error) goto out_values; - descr = xchk_xfile_ino_descr(sc, - "xattr retained parent pointer names"); - error = xfblob_create(descr, &rx->pptr_names); - kfree(descr); + error = xfblob_create("xattr parent pointer names", + &rx->pptr_names); if (error) goto out_pprecs; diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index 7ba35a7a7920..51f3171bc6c8 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_bit.h" diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 4f1e2574660d..d40534bf9ef9 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c index 1084213b8e9b..0a83d5845379 100644 --- a/fs/xfs/scrub/bmap_repair.c +++ b/fs/xfs/scrub/bmap_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -923,7 +923,6 @@ xrep_bmap( bool allow_unwritten) { struct xrep_bmap *rb; - char *descr; xfs_extnum_t max_bmbt_recs; bool large_extcount; int error = 0; @@ -945,11 +944,8 @@ xrep_bmap( /* Set up enough storage to handle the max records for this fork. */ large_extcount = xfs_has_large_extent_counts(sc->mp); max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork); - descr = xchk_xfile_ino_descr(sc, "%s fork mapping records", - whichfork == XFS_DATA_FORK ? "data" : "attr"); - error = xfarray_create(descr, max_bmbt_recs, + error = xfarray_create("fork mapping records", max_bmbt_recs, sizeof(struct xfs_bmbt_rec), &rb->bmap_records); - kfree(descr); if (error) goto out_rb; diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index cd6f0ff382a7..1089b1f4c5df 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -42,6 +42,8 @@ __xchk_btree_process_error( break; case -EFSBADCRC: case -EFSCORRUPTED: + case -EIO: + case -ENODATA: /* Note the badness but don't abort. */ sc->sm->sm_flags |= errflag; *error = 0; @@ -370,12 +372,15 @@ xchk_btree_check_block_owner( { xfs_agnumber_t agno; xfs_agblock_t agbno; + bool is_bnobt, is_rmapbt; bool init_sa; int error = 0; if (!bs->cur) return 0; + is_bnobt = xfs_btree_is_bno(bs->cur->bc_ops); + is_rmapbt = xfs_btree_is_rmap(bs->cur->bc_ops); agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr); agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr); @@ -398,11 +403,11 @@ xchk_btree_check_block_owner( * have to nullify it (to shut down further block owner checks) if * self-xref encounters problems. */ - if (!bs->sc->sa.bno_cur && xfs_btree_is_bno(bs->cur->bc_ops)) + if (!bs->sc->sa.bno_cur && is_bnobt) bs->cur = NULL; xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo); - if (!bs->sc->sa.rmap_cur && xfs_btree_is_rmap(bs->cur->bc_ops)) + if (!bs->sc->sa.rmap_cur && is_rmapbt) bs->cur = NULL; out_free: diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 7bfa37c99480..20e63069088b 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -103,6 +103,8 @@ __xchk_process_error( break; case -EFSBADCRC: case -EFSCORRUPTED: + case -EIO: + case -ENODATA: /* Note the badness but don't abort. */ sc->sm->sm_flags |= errflag; *error = 0; @@ -177,6 +179,8 @@ __xchk_fblock_process_error( break; case -EFSBADCRC: case -EFSCORRUPTED: + case -EIO: + case -ENODATA: /* Note the badness but don't abort. */ sc->sm->sm_flags |= errflag; *error = 0; @@ -1395,6 +1399,9 @@ xchk_metadata_inode_subtype( int error; sub = xchk_scrub_create_subord(sc, scrub_type); + if (!sub) + return -ENOMEM; + error = sub->sc.ops->scrub(&sub->sc); xchk_scrub_free_subord(sub); return error; diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index ddbc065c798c..f2ecc68538f0 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -247,31 +247,6 @@ static inline bool xchk_could_repair(const struct xfs_scrub *sc) int xchk_metadata_inode_forks(struct xfs_scrub *sc); /* - * Helper macros to allocate and format xfile description strings. - * Callers must kfree the pointer returned. - */ -#define xchk_xfile_descr(sc, fmt, ...) \ - kasprintf(XCHK_GFP_FLAGS, "XFS (%s): " fmt, \ - (sc)->mp->m_super->s_id, ##__VA_ARGS__) -#define xchk_xfile_ag_descr(sc, fmt, ...) \ - kasprintf(XCHK_GFP_FLAGS, "XFS (%s): AG 0x%x " fmt, \ - (sc)->mp->m_super->s_id, \ - (sc)->sa.pag ? \ - pag_agno((sc)->sa.pag) : (sc)->sm->sm_agno, \ - ##__VA_ARGS__) -#define xchk_xfile_ino_descr(sc, fmt, ...) \ - kasprintf(XCHK_GFP_FLAGS, "XFS (%s): inode 0x%llx " fmt, \ - (sc)->mp->m_super->s_id, \ - (sc)->ip ? (sc)->ip->i_ino : (sc)->sm->sm_ino, \ - ##__VA_ARGS__) -#define xchk_xfile_rtgroup_descr(sc, fmt, ...) \ - kasprintf(XCHK_GFP_FLAGS, "XFS (%s): rtgroup 0x%x " fmt, \ - (sc)->mp->m_super->s_id, \ - (sc)->sa.pag ? \ - rtg_rgno((sc)->sr.rtg) : (sc)->sm->sm_agno, \ - ##__VA_ARGS__) - -/* * Setting up a hook to wait for intents to drain is costly -- we have to take * the CPU hotplug lock and force an i-cache flush on all CPUs once to set it * up, and again to tear it down. These costs add up quickly, so we only want diff --git a/fs/xfs/scrub/cow_repair.c b/fs/xfs/scrub/cow_repair.c index b2a83801412e..33749cf43520 100644 --- a/fs/xfs/scrub/cow_repair.c +++ b/fs/xfs/scrub/cow_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2022-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 056de4819f86..5858d4d5e279 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -45,6 +45,8 @@ xchk_da_process_error( break; case -EFSBADCRC: case -EFSCORRUPTED: + case -EIO: + case -ENODATA: /* Note the badness but don't abort. */ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; *error = 0; diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index c877bde71e62..91228623d016 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -1102,22 +1102,17 @@ xchk_directory( sd->xname.name = sd->namebuf; if (xfs_has_parent(sc->mp)) { - char *descr; - /* * Set up some staging memory for dirents that we can't check * due to locking contention. */ - descr = xchk_xfile_ino_descr(sc, "slow directory entries"); - error = xfarray_create(descr, 0, sizeof(struct xchk_dirent), - &sd->dir_entries); - kfree(descr); + error = xfarray_create("slow directory entries", 0, + sizeof(struct xchk_dirent), &sd->dir_entries); if (error) goto out_sd; - descr = xchk_xfile_ino_descr(sc, "slow directory entry names"); - error = xfblob_create(descr, &sd->dir_names); - kfree(descr); + error = xfblob_create("slow directory entry names", + &sd->dir_names); if (error) goto out_entries; } diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 8d3b550990b5..f105e49f654b 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -172,8 +172,12 @@ xrep_dir_teardown( struct xrep_dir *rd = sc->buf; xrep_findparent_scan_teardown(&rd->pscan); - xfblob_destroy(rd->dir_names); - xfarray_destroy(rd->dir_entries); + if (rd->dir_names) + xfblob_destroy(rd->dir_names); + rd->dir_names = NULL; + if (rd->dir_entries) + xfarray_destroy(rd->dir_entries); + rd->dir_names = NULL; } /* Set up for a directory repair. */ @@ -1784,20 +1788,15 @@ xrep_dir_setup_scan( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; - char *descr; int error; /* Set up some staging memory for salvaging dirents. */ - descr = xchk_xfile_ino_descr(sc, "directory entries"); - error = xfarray_create(descr, 0, sizeof(struct xrep_dirent), - &rd->dir_entries); - kfree(descr); + error = xfarray_create("directory entries", 0, + sizeof(struct xrep_dirent), &rd->dir_entries); if (error) return error; - descr = xchk_xfile_ino_descr(sc, "directory entry names"); - error = xfblob_create(descr, &rd->dir_names); - kfree(descr); + error = xfblob_create("directory entry names", &rd->dir_names); if (error) goto out_xfarray; diff --git a/fs/xfs/scrub/dirtree.c b/fs/xfs/scrub/dirtree.c index 3a9cdf8738b6..e95dc74f1145 100644 --- a/fs/xfs/scrub/dirtree.c +++ b/fs/xfs/scrub/dirtree.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -81,8 +81,12 @@ xchk_dirtree_buf_cleanup( kfree(path); } - xfblob_destroy(dl->path_names); - xfarray_destroy(dl->path_steps); + if (dl->path_names) + xfblob_destroy(dl->path_names); + dl->path_names = NULL; + if (dl->path_steps) + xfarray_destroy(dl->path_steps); + dl->path_steps = NULL; mutex_destroy(&dl->lock); } @@ -92,7 +96,6 @@ xchk_setup_dirtree( struct xfs_scrub *sc) { struct xchk_dirtree *dl; - char *descr; int error; xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); @@ -116,16 +119,12 @@ xchk_setup_dirtree( mutex_init(&dl->lock); - descr = xchk_xfile_ino_descr(sc, "dirtree path steps"); - error = xfarray_create(descr, 0, sizeof(struct xchk_dirpath_step), - &dl->path_steps); - kfree(descr); + error = xfarray_create("dirtree path steps", 0, + sizeof(struct xchk_dirpath_step), &dl->path_steps); if (error) goto out_dl; - descr = xchk_xfile_ino_descr(sc, "dirtree path names"); - error = xfblob_create(descr, &dl->path_names); - kfree(descr); + error = xfblob_create("dirtree path names", &dl->path_names); if (error) goto out_steps; diff --git a/fs/xfs/scrub/dirtree_repair.c b/fs/xfs/scrub/dirtree_repair.c index 5c04e70ba951..019feaf0d606 100644 --- a/fs/xfs/scrub/dirtree_repair.c +++ b/fs/xfs/scrub/dirtree_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/dqiterate.c b/fs/xfs/scrub/dqiterate.c index 20c4daedd48d..10950e4bd4c3 100644 --- a/fs/xfs/scrub/dqiterate.c +++ b/fs/xfs/scrub/dqiterate.c @@ -3,7 +3,7 @@ * Copyright (C) 2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_bit.h" diff --git a/fs/xfs/scrub/findparent.c b/fs/xfs/scrub/findparent.c index 84487072b6dd..2076f028d271 100644 --- a/fs/xfs/scrub/findparent.c +++ b/fs/xfs/scrub/findparent.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index cebd0d526926..b35f65b537ba 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -3,7 +3,7 @@ * Copyright (C) 2019-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/fscounters_repair.c b/fs/xfs/scrub/fscounters_repair.c index f0d2b04644e4..783e409f8f3c 100644 --- a/fs/xfs/scrub/fscounters_repair.c +++ b/fs/xfs/scrub/fscounters_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 3c0f25098b69..2171bcf0f6c1 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -3,7 +3,7 @@ * Copyright (C) 2019-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 4dc7c83dc08a..911dc0f9a79d 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/ialloc_repair.c b/fs/xfs/scrub/ialloc_repair.c index 14e48d3f1912..9b63b9d19e1b 100644 --- a/fs/xfs/scrub/ialloc_repair.c +++ b/fs/xfs/scrub/ialloc_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -797,7 +797,6 @@ xrep_iallocbt( { struct xrep_ibt *ri; struct xfs_mount *mp = sc->mp; - char *descr; xfs_agino_t first_agino, last_agino; int error = 0; @@ -816,11 +815,9 @@ xrep_iallocbt( /* Set up enough storage to handle an AG with nothing but inodes. */ xfs_agino_range(mp, pag_agno(sc->sa.pag), &first_agino, &last_agino); last_agino /= XFS_INODES_PER_CHUNK; - descr = xchk_xfile_ag_descr(sc, "inode index records"); - error = xfarray_create(descr, last_agino, + error = xfarray_create("inode index records", last_agino, sizeof(struct xfs_inobt_rec_incore), &ri->inode_records); - kfree(descr); if (error) goto out_ri; @@ -866,10 +863,24 @@ xrep_revalidate_iallocbt( if (error) goto out; - if (xfs_has_finobt(sc->mp)) { - sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT; - error = xchk_iallocbt(sc); + /* + * If the inobt is still corrupt, we've failed to repair the filesystem + * and should just bail out. + * + * If the inobt fails cross-examination with the finobt, the scan will + * free the finobt cursor, so we need to mark the repair incomplete + * and avoid walking off the end of the NULL finobt cursor. + */ + if (!xfs_has_finobt(sc->mp) || + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + goto out; + + sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT; + if (!sc->sa.fino_cur) { + xchk_set_incomplete(sc); + goto out; } + error = xchk_iallocbt(sc); out: sc->sm->sm_type = old_type; diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index bb3f475b6353..948d04dcba2a 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 4f7040c9ddf0..bf182a18f115 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/iscan.c b/fs/xfs/scrub/iscan.c index 84f117667ca2..2a974eed00cc 100644 --- a/fs/xfs/scrub/iscan.c +++ b/fs/xfs/scrub/iscan.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/listxattr.c b/fs/xfs/scrub/listxattr.c index 256ff7700c94..0863db64b1b2 100644 --- a/fs/xfs/scrub/listxattr.c +++ b/fs/xfs/scrub/listxattr.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c index 378ec7c8d38e..3d9de59c1758 100644 --- a/fs/xfs/scrub/metapath.c +++ b/fs/xfs/scrub/metapath.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c index 951ae8b71566..43e868f829aa 100644 --- a/fs/xfs/scrub/newbt.c +++ b/fs/xfs/scrub/newbt.c @@ -3,7 +3,7 @@ * Copyright (C) 2022-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c index 091c79e432e5..e80fe7395d78 100644 --- a/fs/xfs/scrub/nlinks.c +++ b/fs/xfs/scrub/nlinks.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -971,7 +971,8 @@ xchk_nlinks_teardown_scan( xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook); - xfarray_destroy(xnc->nlinks); + if (xnc->nlinks) + xfarray_destroy(xnc->nlinks); xnc->nlinks = NULL; xchk_iscan_teardown(&xnc->collect_iscan); @@ -990,7 +991,6 @@ xchk_nlinks_setup_scan( struct xchk_nlink_ctrs *xnc) { struct xfs_mount *mp = sc->mp; - char *descr; unsigned long long max_inos; xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1; xfs_agino_t first_agino, last_agino; @@ -1007,10 +1007,9 @@ xchk_nlinks_setup_scan( */ xfs_agino_range(mp, last_agno, &first_agino, &last_agino); max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1; - descr = xchk_xfile_descr(sc, "file link counts"); - error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos), + error = xfarray_create("file link counts", + min(XFS_MAXINUMBER + 1, max_inos), sizeof(struct xchk_nlink), &xnc->nlinks); - kfree(descr); if (error) goto out_teardown; diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c index 6ef2ee9c3814..9049215c6eae 100644 --- a/fs/xfs/scrub/nlinks_repair.c +++ b/fs/xfs/scrub/nlinks_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 4e550a1d5353..52a108f6d5f4 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 11d5de10fd56..5a259570b154 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -755,7 +755,6 @@ xchk_parent_pptr( struct xfs_scrub *sc) { struct xchk_pptrs *pp; - char *descr; int error; pp = kvzalloc(sizeof(struct xchk_pptrs), XCHK_GFP_FLAGS); @@ -768,16 +767,12 @@ xchk_parent_pptr( * Set up some staging memory for parent pointers that we can't check * due to locking contention. */ - descr = xchk_xfile_ino_descr(sc, "slow parent pointer entries"); - error = xfarray_create(descr, 0, sizeof(struct xchk_pptr), - &pp->pptr_entries); - kfree(descr); + error = xfarray_create("slow parent pointer entries", 0, + sizeof(struct xchk_pptr), &pp->pptr_entries); if (error) goto out_pp; - descr = xchk_xfile_ino_descr(sc, "slow parent pointer names"); - error = xfblob_create(descr, &pp->pptr_names); - kfree(descr); + error = xfblob_create("slow parent pointer names", &pp->pptr_names); if (error) goto out_entries; diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index 2949feda6271..83a8205ae2f1 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -1497,7 +1497,6 @@ xrep_parent_setup_scan( struct xrep_parent *rp) { struct xfs_scrub *sc = rp->sc; - char *descr; struct xfs_da_geometry *geo = sc->mp->m_attr_geo; int max_len; int error; @@ -1525,32 +1524,22 @@ xrep_parent_setup_scan( goto out_xattr_name; /* Set up some staging memory for logging parent pointer updates. */ - descr = xchk_xfile_ino_descr(sc, "parent pointer entries"); - error = xfarray_create(descr, 0, sizeof(struct xrep_pptr), - &rp->pptr_recs); - kfree(descr); + error = xfarray_create("parent pointer entries", 0, + sizeof(struct xrep_pptr), &rp->pptr_recs); if (error) goto out_xattr_value; - descr = xchk_xfile_ino_descr(sc, "parent pointer names"); - error = xfblob_create(descr, &rp->pptr_names); - kfree(descr); + error = xfblob_create("parent pointer names", &rp->pptr_names); if (error) goto out_recs; /* Set up some storage for copying attrs before the mapping exchange */ - descr = xchk_xfile_ino_descr(sc, - "parent pointer retained xattr entries"); - error = xfarray_create(descr, 0, sizeof(struct xrep_parent_xattr), - &rp->xattr_records); - kfree(descr); + error = xfarray_create("parent pointer xattr entries", 0, + sizeof(struct xrep_parent_xattr), &rp->xattr_records); if (error) goto out_names; - descr = xchk_xfile_ino_descr(sc, - "parent pointer retained xattr values"); - error = xfblob_create(descr, &rp->xattr_blobs); - kfree(descr); + error = xfblob_create("parent pointer xattr values", &rp->xattr_blobs); if (error) goto out_attr_keys; diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 5c5374c44c5a..1d25bd5b892e 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_bit.h" diff --git a/fs/xfs/scrub/quota_repair.c b/fs/xfs/scrub/quota_repair.c index b1d661aa5f06..487bd4f68ebb 100644 --- a/fs/xfs/scrub/quota_repair.c +++ b/fs/xfs/scrub/quota_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c index d412a8359784..e8cba19334a0 100644 --- a/fs/xfs/scrub/quotacheck.c +++ b/fs/xfs/scrub/quotacheck.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -741,7 +741,6 @@ xqcheck_setup_scan( struct xfs_scrub *sc, struct xqcheck *xqc) { - char *descr; struct xfs_quotainfo *qi = sc->mp->m_quotainfo; unsigned long long max_dquots = XFS_DQ_ID_MAX + 1ULL; int error; @@ -756,28 +755,22 @@ xqcheck_setup_scan( error = -ENOMEM; if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_USER)) { - descr = xchk_xfile_descr(sc, "user dquot records"); - error = xfarray_create(descr, max_dquots, + error = xfarray_create("user dquot records", max_dquots, sizeof(struct xqcheck_dquot), &xqc->ucounts); - kfree(descr); if (error) goto out_teardown; } if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_GROUP)) { - descr = xchk_xfile_descr(sc, "group dquot records"); - error = xfarray_create(descr, max_dquots, + error = xfarray_create("group dquot records", max_dquots, sizeof(struct xqcheck_dquot), &xqc->gcounts); - kfree(descr); if (error) goto out_teardown; } if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_PROJ)) { - descr = xchk_xfile_descr(sc, "project dquot records"); - error = xfarray_create(descr, max_dquots, + error = xfarray_create("project dquot records", max_dquots, sizeof(struct xqcheck_dquot), &xqc->pcounts); - kfree(descr); if (error) goto out_teardown; } diff --git a/fs/xfs/scrub/quotacheck_repair.c b/fs/xfs/scrub/quotacheck_repair.c index 51be8d8d261b..dbb522e1513b 100644 --- a/fs/xfs/scrub/quotacheck_repair.c +++ b/fs/xfs/scrub/quotacheck_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rcbag.c b/fs/xfs/scrub/rcbag.c index e1e52bc20713..c1a97a073d92 100644 --- a/fs/xfs/scrub/rcbag.c +++ b/fs/xfs/scrub/rcbag.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rcbag_btree.c b/fs/xfs/scrub/rcbag_btree.c index 9a4ef823c5a7..367f8ccf55c4 100644 --- a/fs/xfs/scrub/rcbag_btree.c +++ b/fs/xfs/scrub/rcbag_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/readdir.c b/fs/xfs/scrub/readdir.c index 01c9a2dc0f2c..c66ec9093a38 100644 --- a/fs/xfs/scrub/readdir.c +++ b/fs/xfs/scrub/readdir.c @@ -3,7 +3,7 @@ * Copyright (C) 2022-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c index 07f5bb8a6421..fff23932828b 100644 --- a/fs/xfs/scrub/reap.c +++ b/fs/xfs/scrub/reap.c @@ -3,7 +3,7 @@ * Copyright (C) 2022-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index d46528023015..bf87025f24fc 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/refcount_repair.c b/fs/xfs/scrub/refcount_repair.c index 9c8cb5332da0..507993e0fb0f 100644 --- a/fs/xfs/scrub/refcount_repair.c +++ b/fs/xfs/scrub/refcount_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -123,13 +123,7 @@ int xrep_setup_ag_refcountbt( struct xfs_scrub *sc) { - char *descr; - int error; - - descr = xchk_xfile_ag_descr(sc, "rmap record bag"); - error = xrep_setup_xfbtree(sc, descr); - kfree(descr); - return error; + return xrep_setup_xfbtree(sc, "rmap record bag"); } /* Check for any obvious conflicts with this shared/CoW staging extent. */ @@ -704,7 +698,6 @@ xrep_refcountbt( { struct xrep_refc *rr; struct xfs_mount *mp = sc->mp; - char *descr; int error; /* We require the rmapbt to rebuild anything. */ @@ -717,11 +710,9 @@ xrep_refcountbt( rr->sc = sc; /* Set up enough storage to handle one refcount record per block. */ - descr = xchk_xfile_ag_descr(sc, "reference count records"); - error = xfarray_create(descr, mp->m_sb.sb_agblocks, + error = xfarray_create("reference count records", mp->m_sb.sb_agblocks, sizeof(struct xfs_refcount_irec), &rr->refcount_records); - kfree(descr); if (error) goto out_rr; diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index efd5a7ccdf62..ac8c592579bd 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -1136,6 +1136,9 @@ xrep_metadata_inode_subtype( * setup/teardown routines. */ sub = xchk_scrub_create_subord(sc, scrub_type); + if (!sub) + return -ENOMEM; + error = sub->sc.ops->scrub(&sub->sc); if (error) goto out; diff --git a/fs/xfs/scrub/rgsuper.c b/fs/xfs/scrub/rgsuper.c index d189732d0e24..482f899a518a 100644 --- a/fs/xfs/scrub/rgsuper.c +++ b/fs/xfs/scrub/rgsuper.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index 39e9ad7cd8ae..2c25910e2903 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c index 17d4a38d735c..ab7053e25e1c 100644 --- a/fs/xfs/scrub/rmap_repair.c +++ b/fs/xfs/scrub/rmap_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -164,14 +164,11 @@ xrep_setup_ag_rmapbt( struct xfs_scrub *sc) { struct xrep_rmap *rr; - char *descr; int error; xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP); - descr = xchk_xfile_ag_descr(sc, "reverse mapping records"); - error = xrep_setup_xfbtree(sc, descr); - kfree(descr); + error = xrep_setup_xfbtree(sc, "reverse mapping records"); if (error) return error; diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index d5ff8609dbfb..4bcfd99cec17 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rtbitmap_repair.c b/fs/xfs/scrub/rtbitmap_repair.c index 203a1a97c502..f4ca86a2ea1b 100644 --- a/fs/xfs/scrub/rtbitmap_repair.c +++ b/fs/xfs/scrub/rtbitmap_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2020-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -43,7 +43,6 @@ xrep_setup_rtbitmap( struct xchk_rtbitmap *rtb) { struct xfs_mount *mp = sc->mp; - char *descr; unsigned long long blocks = mp->m_sb.sb_rbmblocks; int error; @@ -52,9 +51,8 @@ xrep_setup_rtbitmap( return error; /* Create an xfile to hold our reconstructed bitmap. */ - descr = xchk_xfile_rtgroup_descr(sc, "bitmap file"); - error = xfile_create(descr, blocks * mp->m_sb.sb_blocksize, &sc->xfile); - kfree(descr); + error = xfile_create("realtime bitmap file", + blocks * mp->m_sb.sb_blocksize, &sc->xfile); if (error) return error; diff --git a/fs/xfs/scrub/rtrefcount.c b/fs/xfs/scrub/rtrefcount.c index 4c5dffc73641..8cfe2f120b6b 100644 --- a/fs/xfs/scrub/rtrefcount.c +++ b/fs/xfs/scrub/rtrefcount.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rtrefcount_repair.c b/fs/xfs/scrub/rtrefcount_repair.c index 983362447826..f713daf095fb 100644 --- a/fs/xfs/scrub/rtrefcount_repair.c +++ b/fs/xfs/scrub/rtrefcount_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -128,13 +128,7 @@ int xrep_setup_rtrefcountbt( struct xfs_scrub *sc) { - char *descr; - int error; - - descr = xchk_xfile_ag_descr(sc, "rmap record bag"); - error = xrep_setup_xfbtree(sc, descr); - kfree(descr); - return error; + return xrep_setup_xfbtree(sc, "realtime rmap record bag"); } /* Check for any obvious conflicts with this shared/CoW staging extent. */ @@ -704,7 +698,6 @@ xrep_rtrefcountbt( { struct xrep_rtrefc *rr; struct xfs_mount *mp = sc->mp; - char *descr; int error; /* We require the rmapbt to rebuild anything. */ @@ -722,11 +715,9 @@ xrep_rtrefcountbt( rr->sc = sc; /* Set up enough storage to handle one refcount record per rt extent. */ - descr = xchk_xfile_ag_descr(sc, "reference count records"); - error = xfarray_create(descr, mp->m_sb.sb_rextents, - sizeof(struct xfs_refcount_irec), + error = xfarray_create("realtime reference count records", + mp->m_sb.sb_rextents, sizeof(struct xfs_refcount_irec), &rr->refcount_records); - kfree(descr); if (error) goto out_rr; diff --git a/fs/xfs/scrub/rtrmap.c b/fs/xfs/scrub/rtrmap.c index 12989fe80e8b..8b1a8389d32f 100644 --- a/fs/xfs/scrub/rtrmap.c +++ b/fs/xfs/scrub/rtrmap.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rtrmap_repair.c b/fs/xfs/scrub/rtrmap_repair.c index 7561941a337a..4610d6d80648 100644 --- a/fs/xfs/scrub/rtrmap_repair.c +++ b/fs/xfs/scrub/rtrmap_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -103,14 +103,11 @@ xrep_setup_rtrmapbt( struct xfs_scrub *sc) { struct xrep_rtrmap *rr; - char *descr; int error; xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP); - descr = xchk_xfile_rtgroup_descr(sc, "reverse mapping records"); - error = xrep_setup_xfbtree(sc, descr); - kfree(descr); + error = xrep_setup_xfbtree(sc, "realtime reverse mapping records"); if (error) return error; diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index 4ac679c1bd29..b510e6bbbd3e 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -43,7 +43,6 @@ xchk_setup_rtsummary( struct xfs_scrub *sc) { struct xfs_mount *mp = sc->mp; - char *descr; struct xchk_rtsummary *rts; int error; @@ -70,10 +69,8 @@ xchk_setup_rtsummary( * Create an xfile to construct a new rtsummary file. The xfile allows * us to avoid pinning kernel memory for this purpose. */ - descr = xchk_xfile_descr(sc, "realtime summary file"); - error = xfile_create(descr, XFS_FSB_TO_B(mp, mp->m_rsumblocks), - &sc->xfile); - kfree(descr); + error = xfile_create("realtime summary file", + XFS_FSB_TO_B(mp, mp->m_rsumblocks), &sc->xfile); if (error) return error; diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c index d593977d70df..afffbd6e0ad1 100644 --- a/fs/xfs/scrub/rtsummary_repair.c +++ b/fs/xfs/scrub/rtsummary_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 3c3b0d25006f..c1c6415f5055 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -634,7 +634,7 @@ xchk_scrub_create_subord( sub = kzalloc(sizeof(*sub), XCHK_GFP_FLAGS); if (!sub) - return ERR_PTR(-ENOMEM); + return NULL; sub->old_smtype = sc->sm->sm_type; sub->old_smflags = sc->sm->sm_flags; diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c index f8a37ea97791..4efafc5ae966 100644 --- a/fs/xfs/scrub/stats.c +++ b/fs/xfs/scrub/stats.c @@ -3,7 +3,7 @@ * Copyright (C) 2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c index c848bcc07cd5..91d40b9fb5c6 100644 --- a/fs/xfs/scrub/symlink.c +++ b/fs/xfs/scrub/symlink.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/symlink_repair.c b/fs/xfs/scrub/symlink_repair.c index df629892462f..25416dfb5189 100644 --- a/fs/xfs/scrub/symlink_repair.c +++ b/fs/xfs/scrub/symlink_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index cf99e0ca51b0..8d754df72aa5 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 987313a52e64..70d353287993 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/xfarray.c b/fs/xfs/scrub/xfarray.c index ed2e8c64b1a8..c7c4a71b6fa7 100644 --- a/fs/xfs/scrub/xfarray.c +++ b/fs/xfs/scrub/xfarray.c @@ -3,7 +3,7 @@ * Copyright (C) 2021-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/xfblob.c b/fs/xfs/scrub/xfblob.c index 6ef2a9637f16..96fc360312de 100644 --- a/fs/xfs/scrub/xfblob.c +++ b/fs/xfs/scrub/xfblob.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c index c753c79df203..2998c9b62f4b 100644 --- a/fs/xfs/scrub/xfile.c +++ b/fs/xfs/scrub/xfile.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h deleted file mode 100644 index 9355ccad9503..000000000000 --- a/fs/xfs/xfs.h +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - */ -#ifndef __XFS_H__ -#define __XFS_H__ - -#ifdef CONFIG_XFS_DEBUG -#define DEBUG 1 -#endif - -#ifdef CONFIG_XFS_DEBUG_EXPENSIVE -#define DEBUG_EXPENSIVE 1 -#endif - -#ifdef CONFIG_XFS_ASSERT_FATAL -#define XFS_ASSERT_FATAL 1 -#endif - -#ifdef CONFIG_XFS_WARN -#define XFS_WARN 1 -#endif - - -#include "xfs_linux.h" - -#endif /* __XFS_H__ */ diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index c7c3dcfa2718..fdfca6fc75b6 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -3,7 +3,7 @@ * Copyright (c) 2008, Christoph Hellwig * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 56a544638491..043ab12a18ea 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -4,7 +4,7 @@ * Copyright (c) 2016-2025 Christoph Hellwig. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 319004bf089f..92331991f9fd 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index e8fa326ac995..354472bf45f1 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -4,7 +4,7 @@ * Author: Allison Henderson <allison.henderson@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_trans_resv.h" @@ -192,10 +192,9 @@ xfs_attri_item_size( STATIC void xfs_attri_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; struct xfs_attri_log_nameval *nv = attrip->attri_nameval; attrip->attri_format.alfi_type = XFS_LI_ATTRI; @@ -220,24 +219,23 @@ xfs_attri_item_format( if (nv->new_value.iov_len > 0) attrip->attri_format.alfi_size++; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT, - &attrip->attri_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_ATTRI_FORMAT, &attrip->attri_format, sizeof(struct xfs_attri_log_format)); - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NAME, nv->name.iov_base, + xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_NAME, nv->name.iov_base, nv->name.iov_len); if (nv->new_name.iov_len > 0) - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NEWNAME, - nv->new_name.iov_base, nv->new_name.iov_len); + xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_NEWNAME, + nv->new_name.iov_base, nv->new_name.iov_len); if (nv->value.iov_len > 0) - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_VALUE, - nv->value.iov_base, nv->value.iov_len); + xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_VALUE, + nv->value.iov_base, nv->value.iov_len); if (nv->new_value.iov_len > 0) - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NEWVALUE, - nv->new_value.iov_base, nv->new_value.iov_len); + xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_NEWVALUE, + nv->new_value.iov_base, nv->new_value.iov_len); } /* @@ -322,16 +320,15 @@ xfs_attrd_item_size( */ STATIC void xfs_attrd_item_format( - struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xfs_log_item *lip, + struct xlog_format_buf *lfb) { struct xfs_attrd_log_item *attrdp = ATTRD_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; attrdp->attrd_format.alfd_type = XFS_LI_ATTRD; attrdp->attrd_format.alfd_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRD_FORMAT, + xlog_format_copy(lfb, XLOG_REG_TYPE_ATTRD_FORMAT, &attrdp->attrd_format, sizeof(struct xfs_attrd_log_format)); } diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 379b48d015d2..114566b1ae5c 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c index 2a736d10eafb..b87e7975b613 100644 --- a/fs/xfs/xfs_bio_io.c +++ b/fs/xfs/xfs_bio_io.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2019 Christoph Hellwig. */ -#include "xfs.h" +#include "xfs_platform.h" static inline unsigned int bio_max_vecs(unsigned int count) { diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 80f0c4bcc483..e8775f254c89 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" @@ -92,10 +92,9 @@ unsigned int xfs_bui_log_space(unsigned int nr) STATIC void xfs_bui_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_bui_log_item *buip = BUI_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(atomic_read(&buip->bui_next_extent) == buip->bui_format.bui_nextents); @@ -103,7 +102,7 @@ xfs_bui_item_format( buip->bui_format.bui_type = XFS_LI_BUI; buip->bui_format.bui_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_BUI_FORMAT, &buip->bui_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_BUI_FORMAT, &buip->bui_format, xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents)); } @@ -188,15 +187,14 @@ unsigned int xfs_bud_log_space(void) STATIC void xfs_bud_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_bud_log_item *budp = BUD_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; budp->bud_format.bud_type = XFS_LI_BUD; budp->bud_format.bud_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_BUD_FORMAT, &budp->bud_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_BUD_FORMAT, &budp->bud_format, sizeof(struct xfs_bud_log_format)); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 2208a720ec3f..0ab00615f1ad 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -4,7 +4,7 @@ * Copyright (c) 2012 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 47edf3041631..db46883991de 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include <linux/backing-dev.h> #include <linux/dax.h> diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index f4c5be67826e..8487635579e5 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -263,24 +263,21 @@ xfs_buf_item_size( static inline void xfs_buf_item_copy_iovec( - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp, + struct xlog_format_buf *lfb, struct xfs_buf *bp, uint offset, int first_bit, uint nbits) { offset += first_bit * XFS_BLF_CHUNK; - xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BCHUNK, - xfs_buf_offset(bp, offset), + xlog_format_copy(lfb, XLOG_REG_TYPE_BCHUNK, xfs_buf_offset(bp, offset), nbits * XFS_BLF_CHUNK); } static void xfs_buf_item_format_segment( struct xfs_buf_log_item *bip, - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp, + struct xlog_format_buf *lfb, uint offset, struct xfs_buf_log_format *blfp) { @@ -308,7 +305,7 @@ xfs_buf_item_format_segment( return; } - blfp = xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size); + blfp = xlog_format_copy(lfb, XLOG_REG_TYPE_BFORMAT, blfp, base_size); blfp->blf_size = 1; if (bip->bli_flags & XFS_BLI_STALE) { @@ -331,8 +328,7 @@ xfs_buf_item_format_segment( nbits = xfs_contig_bits(blfp->blf_data_map, blfp->blf_map_size, first_bit); ASSERT(nbits > 0); - xfs_buf_item_copy_iovec(lv, vecp, bp, offset, - first_bit, nbits); + xfs_buf_item_copy_iovec(lfb, bp, offset, first_bit, nbits); blfp->blf_size++; /* @@ -357,11 +353,10 @@ xfs_buf_item_format_segment( STATIC void xfs_buf_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - struct xfs_log_iovec *vecp = NULL; uint offset = 0; int i; @@ -398,7 +393,7 @@ xfs_buf_item_format( } for (i = 0; i < bip->bli_format_count; i++) { - xfs_buf_item_format_segment(bip, lv, &vecp, offset, + xfs_buf_item_format_segment(bip, lfb, offset, &bip->bli_formats[i]); offset += BBTOB(bp->b_maps[i].bm_len); } diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index e4c8af873632..77ad071ebe78 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c index dcbfa274e06d..0106da0a9f44 100644 --- a/fs/xfs/xfs_buf_mem.c +++ b/fs/xfs/xfs_buf_mem.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_buf.h" #include "xfs_buf_mem.h" diff --git a/fs/xfs/xfs_dahash_test.c b/fs/xfs/xfs_dahash_test.c index 0dab5941e080..f1ee2643b948 100644 --- a/fs/xfs/xfs_dahash_test.c +++ b/fs/xfs/xfs_dahash_test.c @@ -3,7 +3,7 @@ * Copyright (C) 2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 06ac5a7de60a..60a80d4173f7 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index b6ffe4807a11..31477a74b523 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -3,7 +3,7 @@ * Copyright (C) 2010, 2023 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 612ca682a513..2b208e2c5264 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2003 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index b374cd9f1900..491e2a7053a3 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2003 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -44,25 +44,24 @@ xfs_qm_dquot_logitem_size( STATIC void xfs_qm_dquot_logitem_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_disk_dquot ddq; struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; struct xfs_dq_logformat *qlf; - qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QFORMAT); + qlf = xlog_format_start(lfb, XLOG_REG_TYPE_QFORMAT); qlf->qlf_type = XFS_LI_DQUOT; qlf->qlf_size = 2; qlf->qlf_id = qlip->qli_dquot->q_id; qlf->qlf_blkno = qlip->qli_dquot->q_blkno; qlf->qlf_len = 1; qlf->qlf_boffset = qlip->qli_dquot->q_bufoffset; - xlog_finish_iovec(lv, vecp, sizeof(struct xfs_dq_logformat)); + xlog_format_commit(lfb, sizeof(struct xfs_dq_logformat)); xfs_dquot_to_disk(&ddq, qlip->qli_dquot); - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_DQUOT, &ddq, + xlog_format_copy(lfb, XLOG_REG_TYPE_DQUOT, &ddq, sizeof(struct xfs_disk_dquot)); } diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c index 89bc9bcaf51e..fe419b28de22 100644 --- a/fs/xfs/xfs_dquot_item_recover.c +++ b/fs/xfs/xfs_dquot_item_recover.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_drain.c b/fs/xfs/xfs_drain.c index fa5f31931efd..1ad67f6c1fbf 100644 --- a/fs/xfs/xfs_drain.c +++ b/fs/xfs/xfs_drain.c @@ -3,7 +3,7 @@ * Copyright (C) 2022-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 39830b252ac8..d652240a1dca 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_fs.h" @@ -22,6 +22,12 @@ static const unsigned int xfs_errortag_random_default[] = { XFS_ERRTAGS }; #undef XFS_ERRTAG +#define XFS_ERRTAG(_tag, _name, _default) \ + [XFS_ERRTAG_##_tag] = __stringify(_name), +#include "xfs_errortag.h" +static const char *xfs_errortag_names[] = { XFS_ERRTAGS }; +#undef XFS_ERRTAG + struct xfs_errortag_attr { struct attribute attr; unsigned int tag; @@ -50,17 +56,18 @@ xfs_errortag_attr_store( { struct xfs_mount *mp = to_mp(kobject); unsigned int error_tag = to_attr(attr)->tag; + unsigned int val; int ret; if (strcmp(buf, "default") == 0) { - mp->m_errortag[error_tag] = - xfs_errortag_random_default[error_tag]; + val = xfs_errortag_random_default[error_tag]; } else { - ret = kstrtouint(buf, 0, &mp->m_errortag[error_tag]); + ret = kstrtouint(buf, 0, &val); if (ret) return ret; } + WRITE_ONCE(mp->m_errortag[error_tag], val); return count; } @@ -71,9 +78,9 @@ xfs_errortag_attr_show( char *buf) { struct xfs_mount *mp = to_mp(kobject); - unsigned int error_tag = to_attr(attr)->tag; - return snprintf(buf, PAGE_SIZE, "%u\n", mp->m_errortag[error_tag]); + return snprintf(buf, PAGE_SIZE, "%u\n", + READ_ONCE(mp->m_errortag[to_attr(attr)->tag])); } static const struct sysfs_ops xfs_errortag_sysfs_ops = { @@ -114,18 +121,8 @@ int xfs_errortag_init( struct xfs_mount *mp) { - int ret; - - mp->m_errortag = kzalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX, - GFP_KERNEL | __GFP_RETRY_MAYFAIL); - if (!mp->m_errortag) - return -ENOMEM; - - ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, + return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, &mp->m_kobj, "errortag"); - if (ret) - kfree(mp->m_errortag); - return ret; } void @@ -133,33 +130,6 @@ xfs_errortag_del( struct xfs_mount *mp) { xfs_sysfs_del(&mp->m_errortag_kobj); - kfree(mp->m_errortag); -} - -static bool -xfs_errortag_valid( - unsigned int error_tag) -{ - if (error_tag >= XFS_ERRTAG_MAX) - return false; - - /* Error out removed injection types */ - if (error_tag == XFS_ERRTAG_DROP_WRITES) - return false; - return true; -} - -bool -xfs_errortag_enabled( - struct xfs_mount *mp, - unsigned int tag) -{ - if (!mp->m_errortag) - return false; - if (!xfs_errortag_valid(tag)) - return false; - - return mp->m_errortag[tag] != 0; } bool @@ -171,21 +141,7 @@ xfs_errortag_test( { unsigned int randfactor; - /* - * To be able to use error injection anywhere, we need to ensure error - * injection mechanism is already initialized. - * - * Code paths like I/O completion can be called before the - * initialization is complete, but be able to inject errors in such - * places is still useful. - */ - if (!mp->m_errortag) - return false; - - if (!xfs_errortag_valid(error_tag)) - return false; - - randfactor = mp->m_errortag[error_tag]; + randfactor = READ_ONCE(mp->m_errortag[error_tag]); if (!randfactor || get_random_u32_below(randfactor)) return false; @@ -195,6 +151,27 @@ xfs_errortag_test( return true; } +void +xfs_errortag_delay( + struct xfs_mount *mp, + const char *file, + int line, + unsigned int error_tag) +{ + unsigned int delay = READ_ONCE(mp->m_errortag[error_tag]); + + might_sleep(); + + if (!delay) + return; + + xfs_warn_ratelimited(mp, +"Injecting %ums delay at file %s, line %d, on filesystem \"%s\"", + delay, file, line, + mp->m_super->s_id); + mdelay(delay); +} + int xfs_errortag_add( struct xfs_mount *mp, @@ -202,17 +179,60 @@ xfs_errortag_add( { BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX); - if (!xfs_errortag_valid(error_tag)) + if (error_tag >= XFS_ERRTAG_MAX) + return -EINVAL; + + /* Error out removed injection types */ + switch (error_tag) { + case XFS_ERRTAG_DROP_WRITES: return -EINVAL; - mp->m_errortag[error_tag] = xfs_errortag_random_default[error_tag]; + default: + break; + } + + WRITE_ONCE(mp->m_errortag[error_tag], + xfs_errortag_random_default[error_tag]); return 0; } int +xfs_errortag_add_name( + struct xfs_mount *mp, + const char *tag_name) +{ + unsigned int i; + + for (i = 0; i < XFS_ERRTAG_MAX; i++) { + if (xfs_errortag_names[i] && + !strcmp(xfs_errortag_names[i], tag_name)) + return xfs_errortag_add(mp, i); + } + + return -EINVAL; +} + +void +xfs_errortag_copy( + struct xfs_mount *dst_mp, + struct xfs_mount *src_mp) +{ + unsigned int val, i; + + for (i = 0; i < XFS_ERRTAG_MAX; i++) { + val = READ_ONCE(src_mp->m_errortag[i]); + if (val) + WRITE_ONCE(dst_mp->m_errortag[i], val); + } +} + +int xfs_errortag_clearall( struct xfs_mount *mp) { - memset(mp->m_errortag, 0, sizeof(unsigned int) * XFS_ERRTAG_MAX); + unsigned int i; + + for (i = 0; i < XFS_ERRTAG_MAX; i++) + WRITE_ONCE(mp->m_errortag[i], 0); return 0; } #endif /* DEBUG */ diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index fe6a71bbe9cd..05fc1d1cf521 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -40,28 +40,23 @@ bool xfs_errortag_test(struct xfs_mount *mp, const char *file, int line, unsigned int error_tag); #define XFS_TEST_ERROR(mp, tag) \ xfs_errortag_test((mp), __FILE__, __LINE__, (tag)) -bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag); +void xfs_errortag_delay(struct xfs_mount *mp, const char *file, int line, + unsigned int error_tag); #define XFS_ERRORTAG_DELAY(mp, tag) \ - do { \ - might_sleep(); \ - if (!xfs_errortag_enabled((mp), (tag))) \ - break; \ - xfs_warn_ratelimited((mp), \ -"Injecting %ums delay at file %s, line %d, on filesystem \"%s\"", \ - (mp)->m_errortag[(tag)], __FILE__, __LINE__, \ - (mp)->m_super->s_id); \ - mdelay((mp)->m_errortag[(tag)]); \ - } while (0) - + xfs_errortag_delay((mp), __FILE__, __LINE__, (tag)) int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag); +int xfs_errortag_add_name(struct xfs_mount *mp, const char *tag_name); +void xfs_errortag_copy(struct xfs_mount *dst_mp, struct xfs_mount *src_mp); int xfs_errortag_clearall(struct xfs_mount *mp); #else #define xfs_errortag_init(mp) (0) #define xfs_errortag_del(mp) #define XFS_TEST_ERROR(mp, tag) (false) #define XFS_ERRORTAG_DELAY(mp, tag) ((void)0) -#define xfs_errortag_add(mp, tag) (ENOSYS) -#define xfs_errortag_clearall(mp) (ENOSYS) +#define xfs_errortag_add(mp, tag) (-ENOSYS) +#define xfs_errortag_copy(dst_mp, src_mp) ((void)0) +#define xfs_errortag_add_name(mp, tag_name) (-ENOSYS) +#define xfs_errortag_clearall(mp) (-ENOSYS) #endif /* DEBUG */ /* diff --git a/fs/xfs/xfs_exchmaps_item.c b/fs/xfs/xfs_exchmaps_item.c index 229cbe0adf17..13a42467370f 100644 --- a/fs/xfs/xfs_exchmaps_item.c +++ b/fs/xfs/xfs_exchmaps_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" @@ -83,16 +83,14 @@ xfs_xmi_item_size( STATIC void xfs_xmi_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; xmi_lip->xmi_format.xmi_type = XFS_LI_XMI; xmi_lip->xmi_format.xmi_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMI_FORMAT, - &xmi_lip->xmi_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_XMI_FORMAT, &xmi_lip->xmi_format, sizeof(struct xfs_xmi_log_format)); } @@ -166,15 +164,14 @@ xfs_xmd_item_size( STATIC void xfs_xmd_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_xmd_log_item *xmd_lip = XMD_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; xmd_lip->xmd_format.xmd_type = XFS_LI_XMD; xmd_lip->xmd_format.xmd_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMD_FORMAT, &xmd_lip->xmd_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_XMD_FORMAT, &xmd_lip->xmd_format, sizeof(struct xfs_xmd_log_format)); } diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index 0b41bdfecdfb..5c083f29ea65 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 201489d3de08..e3e3c3c89840 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index da3161572735..cfecb2959472 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -5,7 +5,7 @@ * Copyright (c) 2011 Christoph Hellwig. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 418ddab590e0..749a4eb9793c 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" @@ -98,10 +98,9 @@ unsigned int xfs_efi_log_space(unsigned int nr) STATIC void xfs_efi_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_efi_log_item *efip = EFI_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(atomic_read(&efip->efi_next_extent) == efip->efi_format.efi_nextents); @@ -110,7 +109,7 @@ xfs_efi_item_format( efip->efi_format.efi_type = lip->li_type; efip->efi_format.efi_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format, xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents)); } @@ -277,10 +276,9 @@ unsigned int xfs_efd_log_space(unsigned int nr) STATIC void xfs_efd_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_efd_log_item *efdp = EFD_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents); ASSERT(lip->li_type == XFS_LI_EFD || lip->li_type == XFS_LI_EFD_RT); @@ -288,7 +286,7 @@ xfs_efd_item_format( efdp->efd_format.efd_type = lip->li_type; efdp->efd_format.efd_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format, xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents)); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index ecd7bf42446b..04176aae6997 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 044918fbae06..44e1b14069a3 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -4,7 +4,7 @@ * Copyright (c) 2014 Christoph Hellwig. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index af68c7de8ee8..098c2b50bc6f 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -3,7 +3,7 @@ * Copyright (C) 2017 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index b7c21f68edc7..17255c41786b 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -25,6 +25,7 @@ #include "xfs_rtrmap_btree.h" #include "xfs_rtrefcount_btree.h" #include "xfs_metafile.h" +#include "xfs_healthmon.h" #include <linux/fserror.h> @@ -544,6 +545,7 @@ xfs_do_force_shutdown( xfs_stack_trace(); fserror_report_shutdown(mp->m_super, GFP_KERNEL); + xfs_healthmon_report_shutdown(mp, flags); } /* diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index 566fd663c95b..60efe8246304 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_error.h" /* diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c index 5a3e3bf4e7cc..d1291ca15239 100644 --- a/fs/xfs/xfs_handle.c +++ b/fs/xfs/xfs_handle.c @@ -4,7 +4,7 @@ * Copyright (c) 2022-2024 Oracle. * All rights reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index fbb8886c72fe..169123772cb3 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -3,7 +3,7 @@ * Copyright (C) 2019 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -19,6 +19,7 @@ #include "xfs_da_btree.h" #include "xfs_quota_defs.h" #include "xfs_rtgroup.h" +#include "xfs_healthmon.h" #include <linux/fserror.h> @@ -107,14 +108,19 @@ xfs_fs_mark_sick( struct xfs_mount *mp, unsigned int mask) { + unsigned int old_mask; + ASSERT(!(mask & ~XFS_SICK_FS_ALL)); trace_xfs_fs_mark_sick(mp, mask); spin_lock(&mp->m_sb_lock); + old_mask = mp->m_fs_sick; mp->m_fs_sick |= mask; spin_unlock(&mp->m_sb_lock); fserror_report_metadata(mp->m_super, -EFSCORRUPTED, GFP_NOFS); + if (mask) + xfs_healthmon_report_fs(mp, XFS_HEALTHMON_SICK, old_mask, mask); } /* Mark per-fs metadata as having been checked and found unhealthy by fsck. */ @@ -123,15 +129,21 @@ xfs_fs_mark_corrupt( struct xfs_mount *mp, unsigned int mask) { + unsigned int old_mask; + ASSERT(!(mask & ~XFS_SICK_FS_ALL)); trace_xfs_fs_mark_corrupt(mp, mask); spin_lock(&mp->m_sb_lock); + old_mask = mp->m_fs_sick; mp->m_fs_sick |= mask; mp->m_fs_checked |= mask; spin_unlock(&mp->m_sb_lock); fserror_report_metadata(mp->m_super, -EFSCORRUPTED, GFP_NOFS); + if (mask) + xfs_healthmon_report_fs(mp, XFS_HEALTHMON_CORRUPT, old_mask, + mask); } /* Mark a per-fs metadata healed. */ @@ -140,15 +152,22 @@ xfs_fs_mark_healthy( struct xfs_mount *mp, unsigned int mask) { + unsigned int old_mask; + ASSERT(!(mask & ~XFS_SICK_FS_ALL)); trace_xfs_fs_mark_healthy(mp, mask); spin_lock(&mp->m_sb_lock); + old_mask = mp->m_fs_sick; mp->m_fs_sick &= ~mask; if (!(mp->m_fs_sick & XFS_SICK_FS_PRIMARY)) mp->m_fs_sick &= ~XFS_SICK_FS_SECONDARY; mp->m_fs_checked |= mask; spin_unlock(&mp->m_sb_lock); + + if (mask) + xfs_healthmon_report_fs(mp, XFS_HEALTHMON_HEALTHY, old_mask, + mask); } /* Sample which per-fs metadata are unhealthy. */ @@ -198,14 +217,20 @@ xfs_group_mark_sick( struct xfs_group *xg, unsigned int mask) { + unsigned int old_mask; + xfs_group_check_mask(xg, mask); trace_xfs_group_mark_sick(xg, mask); spin_lock(&xg->xg_state_lock); + old_mask = xg->xg_sick; xg->xg_sick |= mask; spin_unlock(&xg->xg_state_lock); fserror_report_metadata(xg->xg_mount->m_super, -EFSCORRUPTED, GFP_NOFS); + if (mask) + xfs_healthmon_report_group(xg, XFS_HEALTHMON_SICK, old_mask, + mask); } /* @@ -216,15 +241,21 @@ xfs_group_mark_corrupt( struct xfs_group *xg, unsigned int mask) { + unsigned int old_mask; + xfs_group_check_mask(xg, mask); trace_xfs_group_mark_corrupt(xg, mask); spin_lock(&xg->xg_state_lock); + old_mask = xg->xg_sick; xg->xg_sick |= mask; xg->xg_checked |= mask; spin_unlock(&xg->xg_state_lock); fserror_report_metadata(xg->xg_mount->m_super, -EFSCORRUPTED, GFP_NOFS); + if (mask) + xfs_healthmon_report_group(xg, XFS_HEALTHMON_CORRUPT, old_mask, + mask); } /* @@ -235,15 +266,22 @@ xfs_group_mark_healthy( struct xfs_group *xg, unsigned int mask) { + unsigned int old_mask; + xfs_group_check_mask(xg, mask); trace_xfs_group_mark_healthy(xg, mask); spin_lock(&xg->xg_state_lock); + old_mask = xg->xg_sick; xg->xg_sick &= ~mask; if (!(xg->xg_sick & XFS_SICK_AG_PRIMARY)) xg->xg_sick &= ~XFS_SICK_AG_SECONDARY; xg->xg_checked |= mask; spin_unlock(&xg->xg_state_lock); + + if (mask) + xfs_healthmon_report_group(xg, XFS_HEALTHMON_HEALTHY, old_mask, + mask); } /* Sample which per-ag metadata are unhealthy. */ @@ -282,10 +320,13 @@ xfs_inode_mark_sick( struct xfs_inode *ip, unsigned int mask) { + unsigned int old_mask; + ASSERT(!(mask & ~XFS_SICK_INO_ALL)); trace_xfs_inode_mark_sick(ip, mask); spin_lock(&ip->i_flags_lock); + old_mask = ip->i_sick; ip->i_sick |= mask; spin_unlock(&ip->i_flags_lock); @@ -299,6 +340,9 @@ xfs_inode_mark_sick( spin_unlock(&VFS_I(ip)->i_lock); fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS); + if (mask) + xfs_healthmon_report_inode(ip, XFS_HEALTHMON_SICK, old_mask, + mask); } /* Mark inode metadata as having been checked and found unhealthy by fsck. */ @@ -307,10 +351,13 @@ xfs_inode_mark_corrupt( struct xfs_inode *ip, unsigned int mask) { + unsigned int old_mask; + ASSERT(!(mask & ~XFS_SICK_INO_ALL)); trace_xfs_inode_mark_corrupt(ip, mask); spin_lock(&ip->i_flags_lock); + old_mask = ip->i_sick; ip->i_sick |= mask; ip->i_checked |= mask; spin_unlock(&ip->i_flags_lock); @@ -325,6 +372,9 @@ xfs_inode_mark_corrupt( spin_unlock(&VFS_I(ip)->i_lock); fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS); + if (mask) + xfs_healthmon_report_inode(ip, XFS_HEALTHMON_CORRUPT, old_mask, + mask); } /* Mark parts of an inode healed. */ @@ -333,15 +383,22 @@ xfs_inode_mark_healthy( struct xfs_inode *ip, unsigned int mask) { + unsigned int old_mask; + ASSERT(!(mask & ~XFS_SICK_INO_ALL)); trace_xfs_inode_mark_healthy(ip, mask); spin_lock(&ip->i_flags_lock); + old_mask = ip->i_sick; ip->i_sick &= ~mask; if (!(ip->i_sick & XFS_SICK_INO_PRIMARY)) ip->i_sick &= ~XFS_SICK_INO_SECONDARY; ip->i_checked |= mask; spin_unlock(&ip->i_flags_lock); + + if (mask) + xfs_healthmon_report_inode(ip, XFS_HEALTHMON_HEALTHY, old_mask, + mask); } /* Sample which parts of an inode are unhealthy. */ @@ -421,6 +478,25 @@ xfs_fsop_geom_health( } } +/* + * Translate XFS_SICK_FS_* into XFS_FSOP_GEOM_SICK_* except for the rt free + * space codes, which are sent via the rtgroup events. + */ +unsigned int +xfs_healthmon_fs_mask( + unsigned int sick_mask) +{ + const struct ioctl_sick_map *m; + unsigned int ioctl_mask = 0; + + for_each_sick_map(fs_map, m) { + if (sick_mask & m->sick_mask) + ioctl_mask |= m->ioctl_mask; + } + + return ioctl_mask; +} + static const struct ioctl_sick_map ag_map[] = { { XFS_SICK_AG_SB, XFS_AG_GEOM_SICK_SB }, { XFS_SICK_AG_AGF, XFS_AG_GEOM_SICK_AGF }, @@ -457,6 +533,22 @@ xfs_ag_geom_health( } } +/* Translate XFS_SICK_AG_* into XFS_AG_GEOM_SICK_*. */ +unsigned int +xfs_healthmon_perag_mask( + unsigned int sick_mask) +{ + const struct ioctl_sick_map *m; + unsigned int ioctl_mask = 0; + + for_each_sick_map(ag_map, m) { + if (sick_mask & m->sick_mask) + ioctl_mask |= m->ioctl_mask; + } + + return ioctl_mask; +} + static const struct ioctl_sick_map rtgroup_map[] = { { XFS_SICK_RG_SUPER, XFS_RTGROUP_GEOM_SICK_SUPER }, { XFS_SICK_RG_BITMAP, XFS_RTGROUP_GEOM_SICK_BITMAP }, @@ -487,6 +579,22 @@ xfs_rtgroup_geom_health( } } +/* Translate XFS_SICK_RG_* into XFS_RTGROUP_GEOM_SICK_*. */ +unsigned int +xfs_healthmon_rtgroup_mask( + unsigned int sick_mask) +{ + const struct ioctl_sick_map *m; + unsigned int ioctl_mask = 0; + + for_each_sick_map(rtgroup_map, m) { + if (sick_mask & m->sick_mask) + ioctl_mask |= m->ioctl_mask; + } + + return ioctl_mask; +} + static const struct ioctl_sick_map ino_map[] = { { XFS_SICK_INO_CORE, XFS_BS_SICK_INODE }, { XFS_SICK_INO_BMBTD, XFS_BS_SICK_BMBTD }, @@ -525,6 +633,22 @@ xfs_bulkstat_health( } } +/* Translate XFS_SICK_INO_* into XFS_BS_SICK_*. */ +unsigned int +xfs_healthmon_inode_mask( + unsigned int sick_mask) +{ + const struct ioctl_sick_map *m; + unsigned int ioctl_mask = 0; + + for_each_sick_map(ino_map, m) { + if (sick_mask & m->sick_mask) + ioctl_mask |= m->ioctl_mask; + } + + return ioctl_mask; +} + /* Mark a block mapping sick. */ void xfs_bmap_mark_sick( diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c new file mode 100644 index 000000000000..ca7352dcd182 --- /dev/null +++ b/fs/xfs/xfs_healthmon.c @@ -0,0 +1,1255 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2024-2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs_platform.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_trace.h" +#include "xfs_ag.h" +#include "xfs_btree.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_quota_defs.h" +#include "xfs_rtgroup.h" +#include "xfs_health.h" +#include "xfs_healthmon.h" +#include "xfs_fsops.h" +#include "xfs_notify_failure.h" +#include "xfs_file.h" +#include "xfs_ioctl.h" + +#include <linux/anon_inodes.h> +#include <linux/eventpoll.h> +#include <linux/poll.h> +#include <linux/fserror.h> + +/* + * Live Health Monitoring + * ====================== + * + * Autonomous self-healing of XFS filesystems requires a means for the kernel + * to send filesystem health events to a monitoring daemon in userspace. To + * accomplish this, we establish a thread_with_file kthread object to handle + * translating internal events about filesystem health into a format that can + * be parsed easily by userspace. When those internal events occur, the core + * filesystem code calls this health monitor to convey the events to userspace. + * Userspace reads events from the file descriptor returned by the ioctl. + * + * The healthmon abstraction has a weak reference to the host filesystem mount + * so that the queueing and processing of the events do not pin the mount and + * cannot slow down the main filesystem. The healthmon object can exist past + * the end of the filesystem mount. + */ + +/* sign of a detached health monitor */ +#define DETACHED_MOUNT_COOKIE ((uintptr_t)0) + +/* Constrain the number of event objects that can build up in memory. */ +#define XFS_HEALTHMON_MAX_EVENTS (SZ_32K / \ + sizeof(struct xfs_healthmon_event)) + +/* Constrain the size of the output buffer for read_iter. */ +#define XFS_HEALTHMON_MAX_OUTBUF SZ_64K + +/* spinlock for atomically updating xfs_mount <-> xfs_healthmon pointers */ +static DEFINE_SPINLOCK(xfs_healthmon_lock); + +/* Grab a reference to the healthmon object for a given mount, if any. */ +static struct xfs_healthmon * +xfs_healthmon_get( + struct xfs_mount *mp) +{ + struct xfs_healthmon *hm; + + rcu_read_lock(); + hm = mp->m_healthmon; + if (hm && !refcount_inc_not_zero(&hm->ref)) + hm = NULL; + rcu_read_unlock(); + + return hm; +} + +/* + * Release the reference to a healthmon object. If there are no more holders, + * free the health monitor after an RCU grace period to eliminate possibility + * of races with xfs_healthmon_get. + */ +static void +xfs_healthmon_put( + struct xfs_healthmon *hm) +{ + if (refcount_dec_and_test(&hm->ref)) { + struct xfs_healthmon_event *event; + struct xfs_healthmon_event *next = hm->first_event; + + while ((event = next) != NULL) { + trace_xfs_healthmon_drop(hm, event); + next = event->next; + kfree(event); + } + + kfree(hm->unmount_event); + kfree(hm->buffer); + mutex_destroy(&hm->lock); + kfree_rcu_mightsleep(hm); + } +} + +/* Attach a health monitor to an xfs_mount. Only one allowed at a time. */ +STATIC int +xfs_healthmon_attach( + struct xfs_mount *mp, + struct xfs_healthmon *hm) +{ + spin_lock(&xfs_healthmon_lock); + if (mp->m_healthmon != NULL) { + spin_unlock(&xfs_healthmon_lock); + return -EEXIST; + } + + refcount_inc(&hm->ref); + mp->m_healthmon = hm; + hm->mount_cookie = (uintptr_t)mp->m_super; + spin_unlock(&xfs_healthmon_lock); + + return 0; +} + +/* Detach a xfs mount from a specific healthmon instance. */ +STATIC void +xfs_healthmon_detach( + struct xfs_healthmon *hm) +{ + spin_lock(&xfs_healthmon_lock); + if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) { + spin_unlock(&xfs_healthmon_lock); + return; + } + + XFS_M((struct super_block *)hm->mount_cookie)->m_healthmon = NULL; + hm->mount_cookie = DETACHED_MOUNT_COOKIE; + spin_unlock(&xfs_healthmon_lock); + + trace_xfs_healthmon_detach(hm); + xfs_healthmon_put(hm); +} + +static inline void xfs_healthmon_bump_events(struct xfs_healthmon *hm) +{ + hm->events++; + hm->total_events++; +} + +static inline void xfs_healthmon_bump_lost(struct xfs_healthmon *hm) +{ + hm->lost_prev_event++; + hm->total_lost++; +} + +/* + * If possible, merge a new event into an existing event. Returns whether or + * not it merged anything. + */ +static bool +xfs_healthmon_merge_events( + struct xfs_healthmon_event *existing, + const struct xfs_healthmon_event *new) +{ + if (!existing) + return false; + + /* type and domain must match to merge events */ + if (existing->type != new->type || + existing->domain != new->domain) + return false; + + switch (existing->type) { + case XFS_HEALTHMON_RUNNING: + case XFS_HEALTHMON_UNMOUNT: + /* should only ever be one of these events anyway */ + return false; + + case XFS_HEALTHMON_LOST: + existing->lostcount += new->lostcount; + return true; + + case XFS_HEALTHMON_SICK: + case XFS_HEALTHMON_CORRUPT: + case XFS_HEALTHMON_HEALTHY: + switch (existing->domain) { + case XFS_HEALTHMON_FS: + existing->fsmask |= new->fsmask; + return true; + case XFS_HEALTHMON_AG: + case XFS_HEALTHMON_RTGROUP: + if (existing->group == new->group){ + existing->grpmask |= new->grpmask; + return true; + } + return false; + case XFS_HEALTHMON_INODE: + if (existing->ino == new->ino && + existing->gen == new->gen) { + existing->imask |= new->imask; + return true; + } + return false; + default: + ASSERT(0); + return false; + } + return false; + + case XFS_HEALTHMON_SHUTDOWN: + /* yes, we can race to shutdown */ + existing->flags |= new->flags; + return true; + + case XFS_HEALTHMON_MEDIA_ERROR: + /* physically adjacent errors can merge */ + if (existing->daddr + existing->bbcount == new->daddr) { + existing->bbcount += new->bbcount; + return true; + } + if (new->daddr + new->bbcount == existing->daddr) { + existing->daddr = new->daddr; + existing->bbcount += new->bbcount; + return true; + } + return false; + + case XFS_HEALTHMON_BUFREAD: + case XFS_HEALTHMON_BUFWRITE: + case XFS_HEALTHMON_DIOREAD: + case XFS_HEALTHMON_DIOWRITE: + case XFS_HEALTHMON_DATALOST: + /* logically adjacent file ranges can merge */ + if (existing->fino != new->fino || existing->fgen != new->fgen) + return false; + + if (existing->fpos + existing->flen == new->fpos) { + existing->flen += new->flen; + return true; + } + + if (new->fpos + new->flen == existing->fpos) { + existing->fpos = new->fpos; + existing->flen += new->flen; + return true; + } + return false; + } + + return false; +} + +/* Insert an event onto the start of the queue. */ +static inline void +__xfs_healthmon_insert( + struct xfs_healthmon *hm, + struct xfs_healthmon_event *event) +{ + struct timespec64 now; + + ktime_get_coarse_real_ts64(&now); + event->time_ns = (now.tv_sec * NSEC_PER_SEC) + now.tv_nsec; + + event->next = hm->first_event; + if (!hm->first_event) + hm->first_event = event; + if (!hm->last_event) + hm->last_event = event; + xfs_healthmon_bump_events(hm); + wake_up(&hm->wait); + + trace_xfs_healthmon_insert(hm, event); +} + +/* Push an event onto the end of the queue. */ +static inline void +__xfs_healthmon_push( + struct xfs_healthmon *hm, + struct xfs_healthmon_event *event) +{ + struct timespec64 now; + + ktime_get_coarse_real_ts64(&now); + event->time_ns = (now.tv_sec * NSEC_PER_SEC) + now.tv_nsec; + + if (!hm->first_event) + hm->first_event = event; + if (hm->last_event) + hm->last_event->next = event; + hm->last_event = event; + event->next = NULL; + xfs_healthmon_bump_events(hm); + wake_up(&hm->wait); + + trace_xfs_healthmon_push(hm, event); +} + +/* Deal with any previously lost events */ +static int +xfs_healthmon_clear_lost_prev( + struct xfs_healthmon *hm) +{ + struct xfs_healthmon_event lost_event = { + .type = XFS_HEALTHMON_LOST, + .domain = XFS_HEALTHMON_MOUNT, + .lostcount = hm->lost_prev_event, + }; + struct xfs_healthmon_event *event = NULL; + + if (xfs_healthmon_merge_events(hm->last_event, &lost_event)) { + trace_xfs_healthmon_merge(hm, hm->last_event); + wake_up(&hm->wait); + goto cleared; + } + + if (hm->events < XFS_HEALTHMON_MAX_EVENTS) + event = kmemdup(&lost_event, sizeof(struct xfs_healthmon_event), + GFP_NOFS); + if (!event) + return -ENOMEM; + + __xfs_healthmon_push(hm, event); +cleared: + hm->lost_prev_event = 0; + return 0; +} + +/* + * Push an event onto the end of the list after dealing with lost events and + * possibly full queues. + */ +STATIC int +xfs_healthmon_push( + struct xfs_healthmon *hm, + const struct xfs_healthmon_event *template) +{ + struct xfs_healthmon_event *event = NULL; + int error = 0; + + /* + * Locklessly check if the health monitor has already detached from the + * mount. If so, ignore the event. If we race with deactivation, + * we'll queue the event but never send it. + */ + if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) + return -ESHUTDOWN; + + mutex_lock(&hm->lock); + + /* Report previously lost events before we do anything else */ + if (hm->lost_prev_event) { + error = xfs_healthmon_clear_lost_prev(hm); + if (error) + goto out_unlock; + } + + /* Try to merge with the newest event */ + if (xfs_healthmon_merge_events(hm->last_event, template)) { + trace_xfs_healthmon_merge(hm, hm->last_event); + wake_up(&hm->wait); + goto out_unlock; + } + + /* Only create a heap event object if we're not already at capacity. */ + if (hm->events < XFS_HEALTHMON_MAX_EVENTS) + event = kmemdup(template, sizeof(struct xfs_healthmon_event), + GFP_NOFS); + if (!event) { + /* No memory means we lose the event */ + trace_xfs_healthmon_lost_event(hm); + xfs_healthmon_bump_lost(hm); + error = -ENOMEM; + goto out_unlock; + } + + __xfs_healthmon_push(hm, event); + +out_unlock: + mutex_unlock(&hm->lock); + return error; +} + +/* + * Report that the filesystem is being unmounted, then detach the xfs mount + * from this healthmon instance. + */ +void +xfs_healthmon_unmount( + struct xfs_mount *mp) +{ + struct xfs_healthmon *hm = xfs_healthmon_get(mp); + + if (!hm) + return; + + trace_xfs_healthmon_report_unmount(hm); + + /* + * Insert the unmount notification at the start of the event queue so + * that userspace knows the filesystem went away as soon as possible. + * There's nothing actionable for userspace after an unmount. Once + * we've inserted the unmount event, hm no longer owns that event. + */ + __xfs_healthmon_insert(hm, hm->unmount_event); + hm->unmount_event = NULL; + + xfs_healthmon_detach(hm); + xfs_healthmon_put(hm); +} + +/* Compute the reporting mask for non-unmount metadata health events. */ +static inline unsigned int +metadata_event_mask( + struct xfs_healthmon *hm, + enum xfs_healthmon_type type, + unsigned int old_mask, + unsigned int new_mask) +{ + /* If we want all events, return all events. */ + if (hm->verbose) + return new_mask; + + switch (type) { + case XFS_HEALTHMON_SICK: + /* Always report runtime corruptions */ + return new_mask; + case XFS_HEALTHMON_CORRUPT: + /* Only report new fsck errors */ + return new_mask & ~old_mask; + case XFS_HEALTHMON_HEALTHY: + /* Only report healthy metadata that got fixed */ + return new_mask & old_mask; + default: + ASSERT(0); + break; + } + + return 0; +} + +/* Report XFS_FS_SICK_* events to healthmon */ +void +xfs_healthmon_report_fs( + struct xfs_mount *mp, + enum xfs_healthmon_type type, + unsigned int old_mask, + unsigned int new_mask) +{ + struct xfs_healthmon_event event = { + .type = type, + .domain = XFS_HEALTHMON_FS, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(mp); + + if (!hm) + return; + + event.fsmask = metadata_event_mask(hm, type, old_mask, new_mask) & + ~XFS_SICK_FS_SECONDARY; + trace_xfs_healthmon_report_fs(hm, old_mask, new_mask, &event); + + if (event.fsmask) + xfs_healthmon_push(hm, &event); + + xfs_healthmon_put(hm); +} + +/* Report XFS_SICK_(AG|RG)* flags to healthmon */ +void +xfs_healthmon_report_group( + struct xfs_group *xg, + enum xfs_healthmon_type type, + unsigned int old_mask, + unsigned int new_mask) +{ + struct xfs_healthmon_event event = { + .type = type, + .group = xg->xg_gno, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(xg->xg_mount); + + if (!hm) + return; + + switch (xg->xg_type) { + case XG_TYPE_RTG: + event.domain = XFS_HEALTHMON_RTGROUP; + event.grpmask = metadata_event_mask(hm, type, old_mask, + new_mask) & + ~XFS_SICK_RG_SECONDARY; + break; + case XG_TYPE_AG: + event.domain = XFS_HEALTHMON_AG; + event.grpmask = metadata_event_mask(hm, type, old_mask, + new_mask) & + ~XFS_SICK_AG_SECONDARY; + break; + default: + ASSERT(0); + break; + } + + trace_xfs_healthmon_report_group(hm, old_mask, new_mask, &event); + + if (event.grpmask) + xfs_healthmon_push(hm, &event); + + xfs_healthmon_put(hm); +} + +/* Report XFS_SICK_INO_* flags to healthmon */ +void +xfs_healthmon_report_inode( + struct xfs_inode *ip, + enum xfs_healthmon_type type, + unsigned int old_mask, + unsigned int new_mask) +{ + struct xfs_healthmon_event event = { + .type = type, + .domain = XFS_HEALTHMON_INODE, + .ino = ip->i_ino, + .gen = VFS_I(ip)->i_generation, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(ip->i_mount); + + if (!hm) + return; + + event.imask = metadata_event_mask(hm, type, old_mask, new_mask) & + ~XFS_SICK_INO_SECONDARY; + trace_xfs_healthmon_report_inode(hm, old_mask, event.imask, &event); + + if (event.imask) + xfs_healthmon_push(hm, &event); + + xfs_healthmon_put(hm); +} + +/* Add a shutdown event to the reporting queue. */ +void +xfs_healthmon_report_shutdown( + struct xfs_mount *mp, + uint32_t flags) +{ + struct xfs_healthmon_event event = { + .type = XFS_HEALTHMON_SHUTDOWN, + .domain = XFS_HEALTHMON_MOUNT, + .flags = flags, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(mp); + + if (!hm) + return; + + trace_xfs_healthmon_report_shutdown(hm, flags); + + xfs_healthmon_push(hm, &event); + xfs_healthmon_put(hm); +} + +static inline enum xfs_healthmon_domain +media_error_domain( + enum xfs_device fdev) +{ + switch (fdev) { + case XFS_DEV_DATA: + return XFS_HEALTHMON_DATADEV; + case XFS_DEV_LOG: + return XFS_HEALTHMON_LOGDEV; + case XFS_DEV_RT: + return XFS_HEALTHMON_RTDEV; + } + + ASSERT(0); + return 0; +} + +/* Add a media error event to the reporting queue. */ +void +xfs_healthmon_report_media( + struct xfs_mount *mp, + enum xfs_device fdev, + xfs_daddr_t daddr, + uint64_t bbcount) +{ + struct xfs_healthmon_event event = { + .type = XFS_HEALTHMON_MEDIA_ERROR, + .domain = media_error_domain(fdev), + .daddr = daddr, + .bbcount = bbcount, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(mp); + + if (!hm) + return; + + trace_xfs_healthmon_report_media(hm, fdev, &event); + + xfs_healthmon_push(hm, &event); + xfs_healthmon_put(hm); +} + +static inline enum xfs_healthmon_type file_ioerr_type(enum fserror_type action) +{ + switch (action) { + case FSERR_BUFFERED_READ: + return XFS_HEALTHMON_BUFREAD; + case FSERR_BUFFERED_WRITE: + return XFS_HEALTHMON_BUFWRITE; + case FSERR_DIRECTIO_READ: + return XFS_HEALTHMON_DIOREAD; + case FSERR_DIRECTIO_WRITE: + return XFS_HEALTHMON_DIOWRITE; + case FSERR_DATA_LOST: + return XFS_HEALTHMON_DATALOST; + case FSERR_METADATA: + /* filtered out by xfs_fs_report_error */ + break; + } + + ASSERT(0); + return -1; +} + +/* Add a file io error event to the reporting queue. */ +void +xfs_healthmon_report_file_ioerror( + struct xfs_inode *ip, + const struct fserror_event *p) +{ + struct xfs_healthmon_event event = { + .type = file_ioerr_type(p->type), + .domain = XFS_HEALTHMON_FILERANGE, + .fino = ip->i_ino, + .fgen = VFS_I(ip)->i_generation, + .fpos = p->pos, + .flen = p->len, + /* send positive error number to userspace */ + .error = -p->error, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(ip->i_mount); + + if (!hm) + return; + + trace_xfs_healthmon_report_file_ioerror(hm, p); + + xfs_healthmon_push(hm, &event); + xfs_healthmon_put(hm); +} + +static inline void +xfs_healthmon_reset_outbuf( + struct xfs_healthmon *hm) +{ + hm->buftail = 0; + hm->bufhead = 0; +} + +struct flags_map { + unsigned int in_mask; + unsigned int out_mask; +}; + +static const struct flags_map shutdown_map[] = { + { SHUTDOWN_META_IO_ERROR, XFS_HEALTH_SHUTDOWN_META_IO_ERROR }, + { SHUTDOWN_LOG_IO_ERROR, XFS_HEALTH_SHUTDOWN_LOG_IO_ERROR }, + { SHUTDOWN_FORCE_UMOUNT, XFS_HEALTH_SHUTDOWN_FORCE_UMOUNT }, + { SHUTDOWN_CORRUPT_INCORE, XFS_HEALTH_SHUTDOWN_CORRUPT_INCORE }, + { SHUTDOWN_CORRUPT_ONDISK, XFS_HEALTH_SHUTDOWN_CORRUPT_ONDISK }, + { SHUTDOWN_DEVICE_REMOVED, XFS_HEALTH_SHUTDOWN_DEVICE_REMOVED }, +}; + +static inline unsigned int +__map_flags( + const struct flags_map *map, + size_t array_len, + unsigned int flags) +{ + const struct flags_map *m; + unsigned int ret = 0; + + for (m = map; m < map + array_len; m++) { + if (flags & m->in_mask) + ret |= m->out_mask; + } + + return ret; +} + +#define map_flags(map, flags) __map_flags((map), ARRAY_SIZE(map), (flags)) + +static inline unsigned int shutdown_mask(unsigned int in) +{ + return map_flags(shutdown_map, in); +} + +static const unsigned int domain_map[] = { + [XFS_HEALTHMON_MOUNT] = XFS_HEALTH_MONITOR_DOMAIN_MOUNT, + [XFS_HEALTHMON_FS] = XFS_HEALTH_MONITOR_DOMAIN_FS, + [XFS_HEALTHMON_AG] = XFS_HEALTH_MONITOR_DOMAIN_AG, + [XFS_HEALTHMON_INODE] = XFS_HEALTH_MONITOR_DOMAIN_INODE, + [XFS_HEALTHMON_RTGROUP] = XFS_HEALTH_MONITOR_DOMAIN_RTGROUP, + [XFS_HEALTHMON_DATADEV] = XFS_HEALTH_MONITOR_DOMAIN_DATADEV, + [XFS_HEALTHMON_RTDEV] = XFS_HEALTH_MONITOR_DOMAIN_RTDEV, + [XFS_HEALTHMON_LOGDEV] = XFS_HEALTH_MONITOR_DOMAIN_LOGDEV, + [XFS_HEALTHMON_FILERANGE] = XFS_HEALTH_MONITOR_DOMAIN_FILERANGE, +}; + +static const unsigned int type_map[] = { + [XFS_HEALTHMON_RUNNING] = XFS_HEALTH_MONITOR_TYPE_RUNNING, + [XFS_HEALTHMON_LOST] = XFS_HEALTH_MONITOR_TYPE_LOST, + [XFS_HEALTHMON_SICK] = XFS_HEALTH_MONITOR_TYPE_SICK, + [XFS_HEALTHMON_CORRUPT] = XFS_HEALTH_MONITOR_TYPE_CORRUPT, + [XFS_HEALTHMON_HEALTHY] = XFS_HEALTH_MONITOR_TYPE_HEALTHY, + [XFS_HEALTHMON_UNMOUNT] = XFS_HEALTH_MONITOR_TYPE_UNMOUNT, + [XFS_HEALTHMON_SHUTDOWN] = XFS_HEALTH_MONITOR_TYPE_SHUTDOWN, + [XFS_HEALTHMON_MEDIA_ERROR] = XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR, + [XFS_HEALTHMON_BUFREAD] = XFS_HEALTH_MONITOR_TYPE_BUFREAD, + [XFS_HEALTHMON_BUFWRITE] = XFS_HEALTH_MONITOR_TYPE_BUFWRITE, + [XFS_HEALTHMON_DIOREAD] = XFS_HEALTH_MONITOR_TYPE_DIOREAD, + [XFS_HEALTHMON_DIOWRITE] = XFS_HEALTH_MONITOR_TYPE_DIOWRITE, + [XFS_HEALTHMON_DATALOST] = XFS_HEALTH_MONITOR_TYPE_DATALOST, +}; + +/* Render event as a V0 structure */ +STATIC int +xfs_healthmon_format_v0( + struct xfs_healthmon *hm, + const struct xfs_healthmon_event *event) +{ + struct xfs_health_monitor_event hme = { + .time_ns = event->time_ns, + }; + + trace_xfs_healthmon_format(hm, event); + + if (event->domain < 0 || event->domain >= ARRAY_SIZE(domain_map) || + event->type < 0 || event->type >= ARRAY_SIZE(type_map)) + return -EFSCORRUPTED; + + hme.domain = domain_map[event->domain]; + hme.type = type_map[event->type]; + + /* fill in the event-specific details */ + switch (event->domain) { + case XFS_HEALTHMON_MOUNT: + switch (event->type) { + case XFS_HEALTHMON_LOST: + hme.e.lost.count = event->lostcount; + break; + case XFS_HEALTHMON_SHUTDOWN: + hme.e.shutdown.reasons = shutdown_mask(event->flags); + break; + default: + break; + } + break; + case XFS_HEALTHMON_FS: + hme.e.fs.mask = xfs_healthmon_fs_mask(event->fsmask); + break; + case XFS_HEALTHMON_RTGROUP: + hme.e.group.mask = xfs_healthmon_rtgroup_mask(event->grpmask); + hme.e.group.gno = event->group; + break; + case XFS_HEALTHMON_AG: + hme.e.group.mask = xfs_healthmon_perag_mask(event->grpmask); + hme.e.group.gno = event->group; + break; + case XFS_HEALTHMON_INODE: + hme.e.inode.mask = xfs_healthmon_inode_mask(event->imask); + hme.e.inode.ino = event->ino; + hme.e.inode.gen = event->gen; + break; + case XFS_HEALTHMON_DATADEV: + case XFS_HEALTHMON_LOGDEV: + case XFS_HEALTHMON_RTDEV: + hme.e.media.daddr = event->daddr; + hme.e.media.bbcount = event->bbcount; + break; + case XFS_HEALTHMON_FILERANGE: + hme.e.filerange.ino = event->fino; + hme.e.filerange.gen = event->fgen; + hme.e.filerange.pos = event->fpos; + hme.e.filerange.len = event->flen; + hme.e.filerange.error = abs(event->error); + break; + default: + break; + } + + ASSERT(hm->bufhead + sizeof(hme) <= hm->bufsize); + + /* copy formatted object to the outbuf */ + if (hm->bufhead + sizeof(hme) <= hm->bufsize) { + memcpy(hm->buffer + hm->bufhead, &hme, sizeof(hme)); + hm->bufhead += sizeof(hme); + } + + return 0; +} + +/* How many bytes are waiting in the outbuf to be copied? */ +static inline size_t +xfs_healthmon_outbuf_bytes( + struct xfs_healthmon *hm) +{ + if (hm->bufhead > hm->buftail) + return hm->bufhead - hm->buftail; + return 0; +} + +/* + * Do we have something for userspace to read? This can mean unmount events, + * events pending in the queue, or pending bytes in the outbuf. + */ +static inline bool +xfs_healthmon_has_eventdata( + struct xfs_healthmon *hm) +{ + /* + * If the health monitor is already detached from the xfs_mount, we + * want reads to return 0 bytes even if there are no events, because + * userspace interprets that as EOF. If we race with deactivation, + * read_iter will take the necessary locks to discover that there are + * no events to send. + */ + if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) + return true; + + /* + * Either there are events waiting to be formatted into the buffer, or + * there's unread bytes in the buffer. + */ + return hm->events > 0 || xfs_healthmon_outbuf_bytes(hm) > 0; +} + +/* Try to copy the rest of the outbuf to the iov iter. */ +STATIC ssize_t +xfs_healthmon_copybuf( + struct xfs_healthmon *hm, + struct iov_iter *to) +{ + size_t to_copy; + size_t w = 0; + + trace_xfs_healthmon_copybuf(hm, to); + + to_copy = xfs_healthmon_outbuf_bytes(hm); + if (to_copy) { + w = copy_to_iter(hm->buffer + hm->buftail, to_copy, to); + if (!w) + return -EFAULT; + + hm->buftail += w; + } + + /* + * Nothing left to copy? Reset the output buffer cursors to the start + * since there's no live data in the buffer. + */ + if (xfs_healthmon_outbuf_bytes(hm) == 0) + xfs_healthmon_reset_outbuf(hm); + return w; +} + +/* + * Return a health monitoring event for formatting into the output buffer if + * there's enough space in the outbuf and an event waiting for us. Caller + * must hold i_rwsem on the healthmon file. + */ +static inline struct xfs_healthmon_event * +xfs_healthmon_format_pop( + struct xfs_healthmon *hm) +{ + struct xfs_healthmon_event *event; + + if (hm->bufhead + sizeof(*event) > hm->bufsize) + return NULL; + + mutex_lock(&hm->lock); + event = hm->first_event; + if (event) { + if (hm->last_event == event) + hm->last_event = NULL; + hm->first_event = event->next; + hm->events--; + + trace_xfs_healthmon_pop(hm, event); + } + mutex_unlock(&hm->lock); + return event; +} + +/* Allocate formatting buffer */ +STATIC int +xfs_healthmon_alloc_outbuf( + struct xfs_healthmon *hm, + size_t user_bufsize) +{ + void *outbuf; + size_t bufsize = + min(XFS_HEALTHMON_MAX_OUTBUF, max(PAGE_SIZE, user_bufsize)); + + outbuf = kzalloc(bufsize, GFP_KERNEL); + if (!outbuf) { + if (bufsize == PAGE_SIZE) + return -ENOMEM; + + bufsize = PAGE_SIZE; + outbuf = kzalloc(bufsize, GFP_KERNEL); + if (!outbuf) + return -ENOMEM; + } + + hm->buffer = outbuf; + hm->bufsize = bufsize; + hm->bufhead = 0; + hm->buftail = 0; + + return 0; +} + +/* + * Convey queued event data to userspace. First copy any remaining bytes in + * the outbuf, then format the oldest event into the outbuf and copy that too. + */ +STATIC ssize_t +xfs_healthmon_read_iter( + struct kiocb *iocb, + struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct xfs_healthmon *hm = file->private_data; + struct xfs_healthmon_event *event; + size_t copied = 0; + ssize_t ret = 0; + + if (file->f_flags & O_NONBLOCK) { + if (!xfs_healthmon_has_eventdata(hm) || !inode_trylock(inode)) + return -EAGAIN; + } else { + ret = wait_event_interruptible(hm->wait, + xfs_healthmon_has_eventdata(hm)); + if (ret) + return ret; + + inode_lock(inode); + } + + if (hm->bufsize == 0) { + ret = xfs_healthmon_alloc_outbuf(hm, iov_iter_count(to)); + if (ret) + goto out_unlock; + } + + trace_xfs_healthmon_read_start(hm); + + /* + * If there's anything left in the output buffer, copy that before + * formatting more events. + */ + ret = xfs_healthmon_copybuf(hm, to); + if (ret < 0) + goto out_unlock; + copied += ret; + + while (iov_iter_count(to) > 0) { + /* Format the next events into the outbuf until it's full. */ + while ((event = xfs_healthmon_format_pop(hm)) != NULL) { + ret = xfs_healthmon_format_v0(hm, event); + kfree(event); + if (ret) + goto out_unlock; + } + + /* Copy anything formatted into outbuf to userspace */ + ret = xfs_healthmon_copybuf(hm, to); + if (ret <= 0) + break; + + copied += ret; + } + +out_unlock: + trace_xfs_healthmon_read_finish(hm); + inode_unlock(inode); + return copied ?: ret; +} + +/* Poll for available events. */ +STATIC __poll_t +xfs_healthmon_poll( + struct file *file, + struct poll_table_struct *wait) +{ + struct xfs_healthmon *hm = file->private_data; + __poll_t mask = 0; + + poll_wait(file, &hm->wait, wait); + + if (xfs_healthmon_has_eventdata(hm)) + mask |= EPOLLIN; + return mask; +} + +/* Free the health monitoring information. */ +STATIC int +xfs_healthmon_release( + struct inode *inode, + struct file *file) +{ + struct xfs_healthmon *hm = file->private_data; + + trace_xfs_healthmon_release(hm); + + /* + * We might be closing the healthmon file before the filesystem + * unmounts, because userspace processes can terminate at any time and + * for any reason. Null out xfs_mount::m_healthmon so that another + * process can create another health monitor file. + */ + xfs_healthmon_detach(hm); + + /* + * Wake up any readers that might be left. There shouldn't be any + * because the only users of the waiter are read and poll. + */ + wake_up_all(&hm->wait); + + xfs_healthmon_put(hm); + return 0; +} + +/* Validate ioctl parameters. */ +static inline bool +xfs_healthmon_validate( + const struct xfs_health_monitor *hmo) +{ + if (hmo->flags & ~XFS_HEALTH_MONITOR_ALL) + return false; + if (hmo->format != XFS_HEALTH_MONITOR_FMT_V0) + return false; + if (memchr_inv(&hmo->pad, 0, sizeof(hmo->pad))) + return false; + return true; +} + +/* Emit some data about the health monitoring fd. */ +static void +xfs_healthmon_show_fdinfo( + struct seq_file *m, + struct file *file) +{ + struct xfs_healthmon *hm = file->private_data; + + mutex_lock(&hm->lock); + seq_printf(m, "state:\t%s\ndev:\t%d:%d\nformat:\tv0\nevents:\t%llu\nlost:\t%llu\n", + hm->mount_cookie == DETACHED_MOUNT_COOKIE ? + "dead" : "alive", + MAJOR(hm->dev), MINOR(hm->dev), + hm->total_events, + hm->total_lost); + mutex_unlock(&hm->lock); +} + +/* Reconfigure the health monitor. */ +STATIC long +xfs_healthmon_reconfigure( + struct file *file, + unsigned int cmd, + void __user *arg) +{ + struct xfs_health_monitor hmo; + struct xfs_healthmon *hm = file->private_data; + + if (copy_from_user(&hmo, arg, sizeof(hmo))) + return -EFAULT; + + if (!xfs_healthmon_validate(&hmo)) + return -EINVAL; + + mutex_lock(&hm->lock); + hm->verbose = !!(hmo.flags & XFS_HEALTH_MONITOR_VERBOSE); + mutex_unlock(&hm->lock); + + return 0; +} + +/* Does the fd point to the same filesystem as the one we're monitoring? */ +STATIC long +xfs_healthmon_file_on_monitored_fs( + struct file *file, + unsigned int cmd, + void __user *arg) +{ + struct xfs_health_file_on_monitored_fs hms; + struct xfs_healthmon *hm = file->private_data; + struct inode *hms_inode; + + if (copy_from_user(&hms, arg, sizeof(hms))) + return -EFAULT; + + if (hms.flags) + return -EINVAL; + + CLASS(fd, hms_fd)(hms.fd); + if (fd_empty(hms_fd)) + return -EBADF; + + hms_inode = file_inode(fd_file(hms_fd)); + mutex_lock(&hm->lock); + if (hm->mount_cookie != (uintptr_t)hms_inode->i_sb) { + mutex_unlock(&hm->lock); + return -ESTALE; + } + + mutex_unlock(&hm->lock); + return 0; +} + +/* Handle ioctls for the health monitoring thread. */ +STATIC long +xfs_healthmon_ioctl( + struct file *file, + unsigned int cmd, + unsigned long p) +{ + void __user *arg = (void __user *)p; + + switch (cmd) { + case XFS_IOC_HEALTH_MONITOR: + return xfs_healthmon_reconfigure(file, cmd, arg); + case XFS_IOC_HEALTH_FD_ON_MONITORED_FS: + return xfs_healthmon_file_on_monitored_fs(file, cmd, arg); + default: + break; + } + + return -ENOTTY; +} + +static const struct file_operations xfs_healthmon_fops = { + .owner = THIS_MODULE, + .show_fdinfo = xfs_healthmon_show_fdinfo, + .read_iter = xfs_healthmon_read_iter, + .poll = xfs_healthmon_poll, + .release = xfs_healthmon_release, + .unlocked_ioctl = xfs_healthmon_ioctl, +}; + +/* + * Create a health monitoring file. Returns an index to the fd table or a + * negative errno. + */ +long +xfs_ioc_health_monitor( + struct file *file, + struct xfs_health_monitor __user *arg) +{ + struct xfs_health_monitor hmo; + struct xfs_healthmon_event *running_event; + struct xfs_healthmon *hm; + struct xfs_inode *ip = XFS_I(file_inode(file)); + struct xfs_mount *mp = ip->i_mount; + int ret; + + /* + * The only intended user of the health monitoring system should be the + * xfs_healer daemon running on behalf of the whole filesystem in the + * initial user namespace. IOWs, we don't allow unprivileged userspace + * (they can use fsnotify) nor do we allow containers. + */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (ip->i_ino != mp->m_sb.sb_rootino) + return -EPERM; + if (current_user_ns() != &init_user_ns) + return -EPERM; + + if (copy_from_user(&hmo, arg, sizeof(hmo))) + return -EFAULT; + + if (!xfs_healthmon_validate(&hmo)) + return -EINVAL; + + hm = kzalloc(sizeof(*hm), GFP_KERNEL); + if (!hm) + return -ENOMEM; + hm->dev = mp->m_super->s_dev; + refcount_set(&hm->ref, 1); + + mutex_init(&hm->lock); + init_waitqueue_head(&hm->wait); + + if (hmo.flags & XFS_HEALTH_MONITOR_VERBOSE) + hm->verbose = true; + + /* Queue up the first event that lets the client know we're running. */ + running_event = kzalloc(sizeof(struct xfs_healthmon_event), GFP_NOFS); + if (!running_event) { + ret = -ENOMEM; + goto out_hm; + } + running_event->type = XFS_HEALTHMON_RUNNING; + running_event->domain = XFS_HEALTHMON_MOUNT; + __xfs_healthmon_insert(hm, running_event); + + /* + * Preallocate the unmount event so that we can't fail to notify the + * filesystem later. This is key for triggering fast exit of the + * xfs_healer daemon. + */ + hm->unmount_event = kzalloc(sizeof(struct xfs_healthmon_event), + GFP_NOFS); + if (!hm->unmount_event) { + ret = -ENOMEM; + goto out_hm; + } + hm->unmount_event->type = XFS_HEALTHMON_UNMOUNT; + hm->unmount_event->domain = XFS_HEALTHMON_MOUNT; + + /* + * Try to attach this health monitor to the xfs_mount. The monitor is + * considered live and will receive events if this succeeds. + */ + ret = xfs_healthmon_attach(mp, hm); + if (ret) + goto out_hm; + + /* + * Create the anonymous file and install a fd for it. If it succeeds, + * the file owns hm and can go away at any time, so we must not access + * it again. This must go last because we can't undo a fd table + * installation. + */ + ret = anon_inode_getfd("xfs_healthmon", &xfs_healthmon_fops, hm, + O_CLOEXEC | O_RDONLY); + if (ret < 0) + goto out_mp; + + trace_xfs_healthmon_create(mp->m_super->s_dev, hmo.flags, hmo.format); + + return ret; + +out_mp: + xfs_healthmon_detach(hm); +out_hm: + ASSERT(refcount_read(&hm->ref) == 1); + xfs_healthmon_put(hm); + return ret; +} diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h new file mode 100644 index 000000000000..0e936507037f --- /dev/null +++ b/fs/xfs/xfs_healthmon.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2024-2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_HEALTHMON_H__ +#define __XFS_HEALTHMON_H__ + +struct xfs_healthmon { + /* + * Weak reference to the xfs filesystem that is being monitored. It + * will be set to zero when the filesystem detaches from the monitor. + * Do not dereference this pointer. + */ + uintptr_t mount_cookie; + + /* + * Device number of the filesystem being monitored. This is for + * consistent tracing even after unmount. + */ + dev_t dev; + + /* + * Reference count of this structure. The open healthmon fd holds one + * ref, the xfs_mount holds another ref if it points to this object, + * and running event handlers hold their own refs. + */ + refcount_t ref; + + /* lock for event list and event counters */ + struct mutex lock; + + /* list of event objects */ + struct xfs_healthmon_event *first_event; + struct xfs_healthmon_event *last_event; + + /* preallocated event for unmount */ + struct xfs_healthmon_event *unmount_event; + + /* number of events in the list */ + unsigned int events; + + /* do we want all events? */ + bool verbose:1; + + /* waiter so read/poll can sleep until the arrival of events */ + struct wait_queue_head wait; + + /* + * Buffer for formatting events for a read_iter call. Events are + * formatted into the buffer at bufhead, and buftail determines where + * to start a copy_iter to get those events to userspace. All buffer + * fields are protected by inode_lock. + */ + char *buffer; + size_t bufsize; + size_t bufhead; + size_t buftail; + + /* did we lose previous events? */ + unsigned long long lost_prev_event; + + /* total counts of events observed and lost events */ + unsigned long long total_events; + unsigned long long total_lost; +}; + +void xfs_healthmon_unmount(struct xfs_mount *mp); + +enum xfs_healthmon_type { + XFS_HEALTHMON_RUNNING, /* monitor running */ + XFS_HEALTHMON_LOST, /* message lost */ + XFS_HEALTHMON_UNMOUNT, /* filesystem is unmounting */ + + /* filesystem shutdown */ + XFS_HEALTHMON_SHUTDOWN, + + /* metadata health events */ + XFS_HEALTHMON_SICK, /* runtime corruption observed */ + XFS_HEALTHMON_CORRUPT, /* fsck reported corruption */ + XFS_HEALTHMON_HEALTHY, /* fsck reported healthy structure */ + + /* media errors */ + XFS_HEALTHMON_MEDIA_ERROR, + + /* file range events */ + XFS_HEALTHMON_BUFREAD, + XFS_HEALTHMON_BUFWRITE, + XFS_HEALTHMON_DIOREAD, + XFS_HEALTHMON_DIOWRITE, + XFS_HEALTHMON_DATALOST, +}; + +enum xfs_healthmon_domain { + XFS_HEALTHMON_MOUNT, /* affects the whole fs */ + + /* metadata health events */ + XFS_HEALTHMON_FS, /* main filesystem metadata */ + XFS_HEALTHMON_AG, /* allocation group metadata */ + XFS_HEALTHMON_INODE, /* inode metadata */ + XFS_HEALTHMON_RTGROUP, /* realtime group metadata */ + + /* media errors */ + XFS_HEALTHMON_DATADEV, + XFS_HEALTHMON_RTDEV, + XFS_HEALTHMON_LOGDEV, + + /* file range events */ + XFS_HEALTHMON_FILERANGE, +}; + +struct xfs_healthmon_event { + struct xfs_healthmon_event *next; + + enum xfs_healthmon_type type; + enum xfs_healthmon_domain domain; + + uint64_t time_ns; + + union { + /* lost events */ + struct { + uint64_t lostcount; + }; + /* fs/rt metadata */ + struct { + /* XFS_SICK_* flags */ + unsigned int fsmask; + }; + /* ag/rtgroup metadata */ + struct { + /* XFS_SICK_(AG|RG)* flags */ + unsigned int grpmask; + unsigned int group; + }; + /* inode metadata */ + struct { + /* XFS_SICK_INO_* flags */ + unsigned int imask; + uint32_t gen; + xfs_ino_t ino; + }; + /* shutdown */ + struct { + unsigned int flags; + }; + /* media errors */ + struct { + xfs_daddr_t daddr; + uint64_t bbcount; + }; + /* file range events */ + struct { + xfs_ino_t fino; + loff_t fpos; + uint64_t flen; + uint32_t fgen; + int error; + }; + }; +}; + +void xfs_healthmon_report_fs(struct xfs_mount *mp, + enum xfs_healthmon_type type, unsigned int old_mask, + unsigned int new_mask); +void xfs_healthmon_report_group(struct xfs_group *xg, + enum xfs_healthmon_type type, unsigned int old_mask, + unsigned int new_mask); +void xfs_healthmon_report_inode(struct xfs_inode *ip, + enum xfs_healthmon_type type, unsigned int old_mask, + unsigned int new_mask); + +void xfs_healthmon_report_shutdown(struct xfs_mount *mp, uint32_t flags); + +void xfs_healthmon_report_media(struct xfs_mount *mp, enum xfs_device fdev, + xfs_daddr_t daddr, uint64_t bbcount); + +void xfs_healthmon_report_file_ioerror(struct xfs_inode *ip, + const struct fserror_event *p); + +long xfs_ioc_health_monitor(struct file *file, + struct xfs_health_monitor __user *arg); + +#endif /* __XFS_HEALTHMON_H__ */ diff --git a/fs/xfs/xfs_hooks.c b/fs/xfs/xfs_hooks.c index a58d1de2d37d..a09109e692b1 100644 --- a/fs/xfs/xfs_hooks.c +++ b/fs/xfs/xfs_hooks.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 23a920437fe4..dbaab4ae709f 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index f83ec2bd0583..95b0eba242e9 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2008-2010, 2013 Dave Chinner * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -49,13 +49,11 @@ xfs_icreate_item_size( STATIC void xfs_icreate_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_icreate_item *icp = ICR_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICREATE, - &icp->ic_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_ICREATE, &icp->ic_format, sizeof(struct xfs_icreate_log)); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f1f88e48fe22..50c0404f9064 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -5,7 +5,7 @@ */ #include <linux/iversion.h> -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 2eb0c6011a2e..8913036b8024 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -336,8 +336,7 @@ STATIC void xfs_inode_item_format_data_fork( struct xfs_inode_log_item *iip, struct xfs_inode_log_format *ilf, - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp) + struct xlog_format_buf *lfb) { struct xfs_inode *ip = iip->ili_inode; size_t data_bytes; @@ -354,9 +353,9 @@ xfs_inode_item_format_data_fork( ASSERT(xfs_iext_count(&ip->i_df) > 0); - p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); + p = xlog_format_start(lfb, XLOG_REG_TYPE_IEXT); data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK); - xlog_finish_iovec(lv, *vecp, data_bytes); + xlog_format_commit(lfb, data_bytes); ASSERT(data_bytes <= ip->i_df.if_bytes); @@ -374,7 +373,7 @@ xfs_inode_item_format_data_fork( if ((iip->ili_fields & XFS_ILOG_DBROOT) && ip->i_df.if_broot_bytes > 0) { ASSERT(ip->i_df.if_broot != NULL); - xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT, + xlog_format_copy(lfb, XLOG_REG_TYPE_IBROOT, ip->i_df.if_broot, ip->i_df.if_broot_bytes); ilf->ilf_dsize = ip->i_df.if_broot_bytes; @@ -392,8 +391,9 @@ xfs_inode_item_format_data_fork( ip->i_df.if_bytes > 0) { ASSERT(ip->i_df.if_data != NULL); ASSERT(ip->i_disk_size > 0); - xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, - ip->i_df.if_data, ip->i_df.if_bytes); + xlog_format_copy(lfb, XLOG_REG_TYPE_ILOCAL, + ip->i_df.if_data, + ip->i_df.if_bytes); ilf->ilf_dsize = (unsigned)ip->i_df.if_bytes; ilf->ilf_size++; } else { @@ -416,8 +416,7 @@ STATIC void xfs_inode_item_format_attr_fork( struct xfs_inode_log_item *iip, struct xfs_inode_log_format *ilf, - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp) + struct xlog_format_buf *lfb) { struct xfs_inode *ip = iip->ili_inode; size_t data_bytes; @@ -435,9 +434,9 @@ xfs_inode_item_format_attr_fork( ASSERT(xfs_iext_count(&ip->i_af) == ip->i_af.if_nextents); - p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT); + p = xlog_format_start(lfb, XLOG_REG_TYPE_IATTR_EXT); data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK); - xlog_finish_iovec(lv, *vecp, data_bytes); + xlog_format_commit(lfb, data_bytes); ilf->ilf_asize = data_bytes; ilf->ilf_size++; @@ -453,7 +452,7 @@ xfs_inode_item_format_attr_fork( ip->i_af.if_broot_bytes > 0) { ASSERT(ip->i_af.if_broot != NULL); - xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT, + xlog_format_copy(lfb, XLOG_REG_TYPE_IATTR_BROOT, ip->i_af.if_broot, ip->i_af.if_broot_bytes); ilf->ilf_asize = ip->i_af.if_broot_bytes; @@ -469,8 +468,9 @@ xfs_inode_item_format_attr_fork( if ((iip->ili_fields & XFS_ILOG_ADATA) && ip->i_af.if_bytes > 0) { ASSERT(ip->i_af.if_data != NULL); - xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL, - ip->i_af.if_data, ip->i_af.if_bytes); + xlog_format_copy(lfb, XLOG_REG_TYPE_IATTR_LOCAL, + ip->i_af.if_data, + ip->i_af.if_bytes); ilf->ilf_asize = (unsigned)ip->i_af.if_bytes; ilf->ilf_size++; } else { @@ -619,14 +619,13 @@ xfs_inode_to_log_dinode( static void xfs_inode_item_format_core( struct xfs_inode *ip, - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp) + struct xlog_format_buf *lfb) { struct xfs_log_dinode *dic; - dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE); + dic = xlog_format_start(lfb, XLOG_REG_TYPE_ICORE); xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn); - xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_mount)); + xlog_format_commit(lfb, xfs_log_dinode_size(ip->i_mount)); } /* @@ -644,14 +643,13 @@ xfs_inode_item_format_core( STATIC void xfs_inode_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - struct xfs_log_iovec *vecp = NULL; struct xfs_inode_log_format *ilf; - ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT); + ilf = xlog_format_start(lfb, XLOG_REG_TYPE_IFORMAT); ilf->ilf_type = XFS_LI_INODE; ilf->ilf_ino = ip->i_ino; ilf->ilf_blkno = ip->i_imap.im_blkno; @@ -668,13 +666,12 @@ xfs_inode_item_format( ilf->ilf_asize = 0; ilf->ilf_pad = 0; memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u)); + xlog_format_commit(lfb, sizeof(*ilf)); - xlog_finish_iovec(lv, vecp, sizeof(*ilf)); - - xfs_inode_item_format_core(ip, lv, &vecp); - xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); + xfs_inode_item_format_core(ip, lfb); + xfs_inode_item_format_data_fork(iip, ilf, lfb); if (xfs_inode_has_attr_fork(ip)) { - xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp); + xfs_inode_item_format_attr_fork(iip, ilf, lfb); } else { iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 9d1999d41be1..5d93228783eb 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 59eaad774371..4eeda4d4e3ab 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -41,6 +41,8 @@ #include "xfs_exchrange.h" #include "xfs_handle.h" #include "xfs_rtgroup.h" +#include "xfs_healthmon.h" +#include "xfs_verify_media.h" #include <linux/mount.h> #include <linux/fileattr.h> @@ -1419,6 +1421,11 @@ xfs_file_ioctl( case XFS_IOC_COMMIT_RANGE: return xfs_ioc_commit_range(filp, arg); + case XFS_IOC_HEALTH_MONITOR: + return xfs_ioc_health_monitor(filp, arg); + case XFS_IOC_VERIFY_MEDIA: + return xfs_ioc_verify_media(filp, arg); + default: return -ENOTTY; } diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index b64785dc4354..c66e192448a8 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -5,7 +5,7 @@ */ #include <linux/mount.h> #include <linux/fsmap.h> -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 37a1b33e9045..be86d43044df 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -4,7 +4,7 @@ * Copyright (c) 2016-2018 Christoph Hellwig. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1cdd8a360510..208543e57eda 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 2aa37a4d2706..9faff287f747 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_iunlink_item.c b/fs/xfs/xfs_iunlink_item.c index 1fd70a7aed63..a03a48eeb9a8 100644 --- a/fs/xfs/xfs_iunlink_item.c +++ b/fs/xfs/xfs_iunlink_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2022, Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index c1c31d1a8e21..ed4033006868 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -3,7 +3,7 @@ * Copyright (C) 2019 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index d4544ccafea5..a26378ca247d 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -22,6 +22,15 @@ #include "xfs_health.h" #include "xfs_zone_alloc.h" +struct xlog_write_data { + struct xlog_ticket *ticket; + struct xlog_in_core *iclog; + uint32_t bytes_left; + uint32_t record_cnt; + uint32_t data_cnt; + int log_offset; +}; + struct kmem_cache *xfs_log_ticket_cache; /* Local miscellaneous function prototypes */ @@ -43,10 +52,7 @@ STATIC void xlog_state_do_callback( STATIC int xlog_state_get_iclog_space( struct xlog *log, - int len, - struct xlog_in_core **iclog, - struct xlog_ticket *ticket, - int *logoffsetp); + struct xlog_write_data *data); STATIC void xlog_sync( struct xlog *log, @@ -74,62 +80,6 @@ xlog_iclogs_empty( static int xfs_log_cover(struct xfs_mount *); -/* - * We need to make sure the buffer pointer returned is naturally aligned for the - * biggest basic data type we put into it. We have already accounted for this - * padding when sizing the buffer. - * - * However, this padding does not get written into the log, and hence we have to - * track the space used by the log vectors separately to prevent log space hangs - * due to inaccurate accounting (i.e. a leak) of the used log space through the - * CIL context ticket. - * - * We also add space for the xlog_op_header that describes this region in the - * log. This prepends the data region we return to the caller to copy their data - * into, so do all the static initialisation of the ophdr now. Because the ophdr - * is not 8 byte aligned, we have to be careful to ensure that we align the - * start of the buffer such that the region we return to the call is 8 byte - * aligned and packed against the tail of the ophdr. - */ -void * -xlog_prepare_iovec( - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp, - uint type) -{ - struct xfs_log_iovec *vec = *vecp; - struct xlog_op_header *oph; - uint32_t len; - void *buf; - - if (vec) { - ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs); - vec++; - } else { - vec = &lv->lv_iovecp[0]; - } - - len = lv->lv_buf_used + sizeof(struct xlog_op_header); - if (!IS_ALIGNED(len, sizeof(uint64_t))) { - lv->lv_buf_used = round_up(len, sizeof(uint64_t)) - - sizeof(struct xlog_op_header); - } - - vec->i_type = type; - vec->i_addr = lv->lv_buf + lv->lv_buf_used; - - oph = vec->i_addr; - oph->oh_clientid = XFS_TRANSACTION; - oph->oh_res2 = 0; - oph->oh_flags = 0; - - buf = vec->i_addr + sizeof(struct xlog_op_header); - ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t))); - - *vecp = vec; - return buf; -} - static inline void xlog_grant_sub_space( struct xlog_grant_head *head, @@ -848,6 +798,27 @@ xlog_wait_on_iclog( return 0; } +int +xlog_write_one_vec( + struct xlog *log, + struct xfs_cil_ctx *ctx, + struct xfs_log_iovec *reg, + struct xlog_ticket *ticket) +{ + struct xfs_log_vec lv = { + .lv_niovecs = 1, + .lv_iovecp = reg, + .lv_bytes = reg->i_len, + }; + LIST_HEAD (lv_chain); + + /* account for space used by record data */ + ticket->t_curr_res -= lv.lv_bytes; + + list_add(&lv.lv_list, &lv_chain); + return xlog_write(log, ctx, &lv_chain, ticket, lv.lv_bytes); +} + /* * Write out an unmount record using the ticket provided. We have to account for * the data space used in the unmount ticket as this write is not done from a @@ -876,21 +847,8 @@ xlog_write_unmount_record( .i_len = sizeof(unmount_rec), .i_type = XLOG_REG_TYPE_UNMOUNT, }; - struct xfs_log_vec vec = { - .lv_niovecs = 1, - .lv_iovecp = ®, - }; - LIST_HEAD(lv_chain); - list_add(&vec.lv_list, &lv_chain); - - BUILD_BUG_ON((sizeof(struct xlog_op_header) + - sizeof(struct xfs_unmount_log_format)) != - sizeof(unmount_rec)); - /* account for space used by record data */ - ticket->t_curr_res -= sizeof(unmount_rec); - - return xlog_write(log, NULL, &lv_chain, ticket, reg.i_len); + return xlog_write_one_vec(log, NULL, ®, ticket); } /* @@ -1922,25 +1880,36 @@ xlog_print_trans( } } +static inline uint32_t xlog_write_space_left(struct xlog_write_data *data) +{ + return data->iclog->ic_size - data->log_offset; +} + +static void * +xlog_write_space_advance( + struct xlog_write_data *data, + unsigned int len) +{ + void *p = data->iclog->ic_datap + data->log_offset; + + ASSERT(xlog_write_space_left(data) >= len); + ASSERT(data->log_offset % sizeof(int32_t) == 0); + ASSERT(len % sizeof(int32_t) == 0); + + data->data_cnt += len; + data->log_offset += len; + data->bytes_left -= len; + return p; +} + static inline void xlog_write_iovec( - struct xlog_in_core *iclog, - uint32_t *log_offset, - void *data, - uint32_t write_len, - int *bytes_left, - uint32_t *record_cnt, - uint32_t *data_cnt) + struct xlog_write_data *data, + void *buf, + uint32_t buf_len) { - ASSERT(*log_offset < iclog->ic_log->l_iclog_size); - ASSERT(*log_offset % sizeof(int32_t) == 0); - ASSERT(write_len % sizeof(int32_t) == 0); - - memcpy(iclog->ic_datap + *log_offset, data, write_len); - *log_offset += write_len; - *bytes_left -= write_len; - (*record_cnt)++; - *data_cnt += write_len; + memcpy(xlog_write_space_advance(data, buf_len), buf, buf_len); + data->record_cnt++; } /* @@ -1950,17 +1919,12 @@ xlog_write_iovec( static void xlog_write_full( struct xfs_log_vec *lv, - struct xlog_ticket *ticket, - struct xlog_in_core *iclog, - uint32_t *log_offset, - uint32_t *len, - uint32_t *record_cnt, - uint32_t *data_cnt) + struct xlog_write_data *data) { int index; - ASSERT(*log_offset + *len <= iclog->ic_size || - iclog->ic_state == XLOG_STATE_WANT_SYNC); + ASSERT(data->bytes_left <= xlog_write_space_left(data) || + data->iclog->ic_state == XLOG_STATE_WANT_SYNC); /* * Ordered log vectors have no regions to write so this @@ -1970,40 +1934,32 @@ xlog_write_full( struct xfs_log_iovec *reg = &lv->lv_iovecp[index]; struct xlog_op_header *ophdr = reg->i_addr; - ophdr->oh_tid = cpu_to_be32(ticket->t_tid); - xlog_write_iovec(iclog, log_offset, reg->i_addr, - reg->i_len, len, record_cnt, data_cnt); + ophdr->oh_tid = cpu_to_be32(data->ticket->t_tid); + xlog_write_iovec(data, reg->i_addr, reg->i_len); } } static int xlog_write_get_more_iclog_space( - struct xlog_ticket *ticket, - struct xlog_in_core **iclogp, - uint32_t *log_offset, - uint32_t len, - uint32_t *record_cnt, - uint32_t *data_cnt) + struct xlog_write_data *data) { - struct xlog_in_core *iclog = *iclogp; - struct xlog *log = iclog->ic_log; + struct xlog *log = data->iclog->ic_log; int error; spin_lock(&log->l_icloglock); - ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC); - xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); - error = xlog_state_release_iclog(log, iclog, ticket); + ASSERT(data->iclog->ic_state == XLOG_STATE_WANT_SYNC); + xlog_state_finish_copy(log, data->iclog, data->record_cnt, + data->data_cnt); + error = xlog_state_release_iclog(log, data->iclog, data->ticket); spin_unlock(&log->l_icloglock); if (error) return error; - error = xlog_state_get_iclog_space(log, len, &iclog, ticket, - log_offset); + error = xlog_state_get_iclog_space(log, data); if (error) return error; - *record_cnt = 0; - *data_cnt = 0; - *iclogp = iclog; + data->record_cnt = 0; + data->data_cnt = 0; return 0; } @@ -2016,14 +1972,8 @@ xlog_write_get_more_iclog_space( static int xlog_write_partial( struct xfs_log_vec *lv, - struct xlog_ticket *ticket, - struct xlog_in_core **iclogp, - uint32_t *log_offset, - uint32_t *len, - uint32_t *record_cnt, - uint32_t *data_cnt) + struct xlog_write_data *data) { - struct xlog_in_core *iclog = *iclogp; struct xlog_op_header *ophdr; int index = 0; uint32_t rlen; @@ -2045,25 +1995,22 @@ xlog_write_partial( * Hence if there isn't space for region data after the * opheader, then we need to start afresh with a new iclog. */ - if (iclog->ic_size - *log_offset <= + if (xlog_write_space_left(data) <= sizeof(struct xlog_op_header)) { - error = xlog_write_get_more_iclog_space(ticket, - &iclog, log_offset, *len, record_cnt, - data_cnt); + error = xlog_write_get_more_iclog_space(data); if (error) return error; } ophdr = reg->i_addr; - rlen = min_t(uint32_t, reg->i_len, iclog->ic_size - *log_offset); + rlen = min_t(uint32_t, reg->i_len, xlog_write_space_left(data)); - ophdr->oh_tid = cpu_to_be32(ticket->t_tid); + ophdr->oh_tid = cpu_to_be32(data->ticket->t_tid); ophdr->oh_len = cpu_to_be32(rlen - sizeof(struct xlog_op_header)); if (rlen != reg->i_len) ophdr->oh_flags |= XLOG_CONTINUE_TRANS; - xlog_write_iovec(iclog, log_offset, reg->i_addr, - rlen, len, record_cnt, data_cnt); + xlog_write_iovec(data, reg->i_addr, rlen); /* If we wrote the whole region, move to the next. */ if (rlen == reg->i_len) @@ -2098,22 +2045,20 @@ xlog_write_partial( * consumes hasn't been accounted to the lv we are * writing. */ - error = xlog_write_get_more_iclog_space(ticket, - &iclog, log_offset, - *len + sizeof(struct xlog_op_header), - record_cnt, data_cnt); + data->bytes_left += sizeof(struct xlog_op_header); + error = xlog_write_get_more_iclog_space(data); if (error) return error; - ophdr = iclog->ic_datap + *log_offset; - ophdr->oh_tid = cpu_to_be32(ticket->t_tid); + ophdr = xlog_write_space_advance(data, + sizeof(struct xlog_op_header)); + ophdr->oh_tid = cpu_to_be32(data->ticket->t_tid); ophdr->oh_clientid = XFS_TRANSACTION; ophdr->oh_res2 = 0; ophdr->oh_flags = XLOG_WAS_CONT_TRANS; - ticket->t_curr_res -= sizeof(struct xlog_op_header); - *log_offset += sizeof(struct xlog_op_header); - *data_cnt += sizeof(struct xlog_op_header); + data->ticket->t_curr_res -= + sizeof(struct xlog_op_header); /* * If rlen fits in the iclog, then end the region @@ -2121,26 +2066,19 @@ xlog_write_partial( */ reg_offset += rlen; rlen = reg->i_len - reg_offset; - if (rlen <= iclog->ic_size - *log_offset) + if (rlen <= xlog_write_space_left(data)) ophdr->oh_flags |= XLOG_END_TRANS; else ophdr->oh_flags |= XLOG_CONTINUE_TRANS; - rlen = min_t(uint32_t, rlen, iclog->ic_size - *log_offset); + rlen = min_t(uint32_t, rlen, + xlog_write_space_left(data)); ophdr->oh_len = cpu_to_be32(rlen); - xlog_write_iovec(iclog, log_offset, - reg->i_addr + reg_offset, - rlen, len, record_cnt, data_cnt); - + xlog_write_iovec(data, reg->i_addr + reg_offset, rlen); } while (ophdr->oh_flags & XLOG_CONTINUE_TRANS); } - /* - * No more iovecs remain in this logvec so return the next log vec to - * the caller so it can go back to fast path copying. - */ - *iclogp = iclog; return 0; } @@ -2193,12 +2131,12 @@ xlog_write( uint32_t len) { - struct xlog_in_core *iclog = NULL; struct xfs_log_vec *lv; - uint32_t record_cnt = 0; - uint32_t data_cnt = 0; - int error = 0; - int log_offset; + struct xlog_write_data data = { + .ticket = ticket, + .bytes_left = len, + }; + int error; if (ticket->t_curr_res < 0) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, @@ -2207,12 +2145,11 @@ xlog_write( xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); } - error = xlog_state_get_iclog_space(log, len, &iclog, ticket, - &log_offset); + error = xlog_state_get_iclog_space(log, &data); if (error) return error; - ASSERT(log_offset <= iclog->ic_size - 1); + ASSERT(xlog_write_space_left(&data) > 0); /* * If we have a context pointer, pass it the first iclog we are @@ -2220,7 +2157,7 @@ xlog_write( * ordering. */ if (ctx) - xlog_cil_set_ctx_write_state(ctx, iclog); + xlog_cil_set_ctx_write_state(ctx, data.iclog); list_for_each_entry(lv, lv_chain, lv_list) { /* @@ -2228,10 +2165,8 @@ xlog_write( * the partial copy loop which can handle this case. */ if (lv->lv_niovecs && - lv->lv_bytes > iclog->ic_size - log_offset) { - error = xlog_write_partial(lv, ticket, &iclog, - &log_offset, &len, &record_cnt, - &data_cnt); + lv->lv_bytes > xlog_write_space_left(&data)) { + error = xlog_write_partial(lv, &data); if (error) { /* * We have no iclog to release, so just return @@ -2240,11 +2175,10 @@ xlog_write( return error; } } else { - xlog_write_full(lv, ticket, iclog, &log_offset, - &len, &record_cnt, &data_cnt); + xlog_write_full(lv, &data); } } - ASSERT(len == 0); + ASSERT(data.bytes_left == 0); /* * We've already been guaranteed that the last writes will fit inside @@ -2253,8 +2187,8 @@ xlog_write( * iclog with the number of bytes written here. */ spin_lock(&log->l_icloglock); - xlog_state_finish_copy(log, iclog, record_cnt, 0); - error = xlog_state_release_iclog(log, iclog, ticket); + xlog_state_finish_copy(log, data.iclog, data.record_cnt, 0); + error = xlog_state_release_iclog(log, data.iclog, ticket); spin_unlock(&log->l_icloglock); return error; @@ -2576,10 +2510,7 @@ xlog_state_done_syncing( STATIC int xlog_state_get_iclog_space( struct xlog *log, - int len, - struct xlog_in_core **iclogp, - struct xlog_ticket *ticket, - int *logoffsetp) + struct xlog_write_data *data) { int log_offset; struct xlog_rec_header *head; @@ -2614,7 +2545,7 @@ restart: * must be written. */ if (log_offset == 0) { - ticket->t_curr_res -= log->l_iclog_hsize; + data->ticket->t_curr_res -= log->l_iclog_hsize; head->h_cycle = cpu_to_be32(log->l_curr_cycle); head->h_lsn = cpu_to_be64( xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block)); @@ -2644,7 +2575,8 @@ restart: * reference to the iclog. */ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) - error = xlog_state_release_iclog(log, iclog, ticket); + error = xlog_state_release_iclog(log, iclog, + data->ticket); spin_unlock(&log->l_icloglock); if (error) return error; @@ -2657,16 +2589,16 @@ restart: * iclogs (to mark it taken), this particular iclog will release/sync * to disk in xlog_write(). */ - if (len <= iclog->ic_size - iclog->ic_offset) - iclog->ic_offset += len; + if (data->bytes_left <= iclog->ic_size - iclog->ic_offset) + iclog->ic_offset += data->bytes_left; else xlog_state_switch_iclogs(log, iclog, iclog->ic_size); - *iclogp = iclog; + data->iclog = iclog; ASSERT(iclog->ic_offset <= iclog->ic_size); spin_unlock(&log->l_icloglock); - *logoffsetp = log_offset; + data->log_offset = log_offset; return 0; } diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index dcc1f44ed68f..0f23812b0b31 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -6,20 +6,9 @@ #ifndef __XFS_LOG_H__ #define __XFS_LOG_H__ +struct xlog_format_buf; struct xfs_cil_ctx; -struct xfs_log_vec { - struct list_head lv_list; /* CIL lv chain ptrs */ - uint32_t lv_order_id; /* chain ordering info */ - int lv_niovecs; /* number of iovecs in lv */ - struct xfs_log_iovec *lv_iovecp; /* iovec array */ - struct xfs_log_item *lv_item; /* owner */ - char *lv_buf; /* formatted buffer */ - int lv_bytes; /* accounted space in buffer */ - int lv_buf_used; /* buffer space used so far */ - int lv_alloc_size; /* size of allocated lv */ -}; - /* Region types for iovec's i_type */ #define XLOG_REG_TYPE_BFORMAT 1 #define XLOG_REG_TYPE_BCHUNK 2 @@ -70,58 +59,24 @@ xlog_calc_iovec_len(int len) return roundup(len, sizeof(uint32_t)); } -void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, - uint type); - -static inline void -xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, - int data_len) -{ - struct xlog_op_header *oph = vec->i_addr; - int len; - - /* - * Always round up the length to the correct alignment so callers don't - * need to know anything about this log vec layout requirement. This - * means we have to zero the area the data to be written does not cover. - * This is complicated by fact the payload region is offset into the - * logvec region by the opheader that tracks the payload. - */ - len = xlog_calc_iovec_len(data_len); - if (len - data_len != 0) { - char *buf = vec->i_addr + sizeof(struct xlog_op_header); - - memset(buf + data_len, 0, len - data_len); - } - - /* - * The opheader tracks aligned payload length, whilst the logvec tracks - * the overall region length. - */ - oph->oh_len = cpu_to_be32(len); - - len += sizeof(struct xlog_op_header); - lv->lv_buf_used += len; - lv->lv_bytes += len; - vec->i_len = len; - - /* Catch buffer overruns */ - ASSERT((void *)lv->lv_buf + lv->lv_bytes <= - (void *)lv + lv->lv_alloc_size); -} +void *xlog_format_start(struct xlog_format_buf *lfb, uint16_t type); +void xlog_format_commit(struct xlog_format_buf *lfb, unsigned int data_len); /* * Copy the amount of data requested by the caller into a new log iovec. */ static inline void * -xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, - uint type, void *data, int len) +xlog_format_copy( + struct xlog_format_buf *lfb, + uint16_t type, + void *data, + unsigned int len) { void *buf; - buf = xlog_prepare_iovec(lv, vecp, type); + buf = xlog_format_start(lfb, type); memcpy(buf, data, len); - xlog_finish_iovec(lv, *vecp, len); + xlog_format_commit(lfb, len); return buf; } diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 778ac47adb8c..566976b8fef3 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -3,7 +3,7 @@ * Copyright (c) 2010 Red Hat, Inc. All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" @@ -409,6 +409,102 @@ xfs_cil_prepare_item( lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence; } +struct xlog_format_buf { + struct xfs_log_vec *lv; + unsigned int idx; +}; + +/* + * We need to make sure the buffer pointer returned is naturally aligned for the + * biggest basic data type we put into it. We have already accounted for this + * padding when sizing the buffer. + * + * However, this padding does not get written into the log, and hence we have to + * track the space used by the log vectors separately to prevent log space hangs + * due to inaccurate accounting (i.e. a leak) of the used log space through the + * CIL context ticket. + * + * We also add space for the xlog_op_header that describes this region in the + * log. This prepends the data region we return to the caller to copy their data + * into, so do all the static initialisation of the ophdr now. Because the ophdr + * is not 8 byte aligned, we have to be careful to ensure that we align the + * start of the buffer such that the region we return to the call is 8 byte + * aligned and packed against the tail of the ophdr. + */ +void * +xlog_format_start( + struct xlog_format_buf *lfb, + uint16_t type) +{ + struct xfs_log_vec *lv = lfb->lv; + struct xfs_log_iovec *vec = &lv->lv_iovecp[lfb->idx]; + struct xlog_op_header *oph; + uint32_t len; + void *buf; + + ASSERT(lfb->idx < lv->lv_niovecs); + + len = lv->lv_buf_used + sizeof(struct xlog_op_header); + if (!IS_ALIGNED(len, sizeof(uint64_t))) { + lv->lv_buf_used = round_up(len, sizeof(uint64_t)) - + sizeof(struct xlog_op_header); + } + + vec->i_type = type; + vec->i_addr = lv->lv_buf + lv->lv_buf_used; + + oph = vec->i_addr; + oph->oh_clientid = XFS_TRANSACTION; + oph->oh_res2 = 0; + oph->oh_flags = 0; + + buf = vec->i_addr + sizeof(struct xlog_op_header); + ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t))); + return buf; +} + +void +xlog_format_commit( + struct xlog_format_buf *lfb, + unsigned int data_len) +{ + struct xfs_log_vec *lv = lfb->lv; + struct xfs_log_iovec *vec = &lv->lv_iovecp[lfb->idx]; + struct xlog_op_header *oph = vec->i_addr; + int len; + + /* + * Always round up the length to the correct alignment so callers don't + * need to know anything about this log vec layout requirement. This + * means we have to zero the area the data to be written does not cover. + * This is complicated by fact the payload region is offset into the + * logvec region by the opheader that tracks the payload. + */ + len = xlog_calc_iovec_len(data_len); + if (len - data_len != 0) { + char *buf = vec->i_addr + sizeof(struct xlog_op_header); + + memset(buf + data_len, 0, len - data_len); + } + + /* + * The opheader tracks aligned payload length, whilst the logvec tracks + * the overall region length. + */ + oph->oh_len = cpu_to_be32(len); + + len += sizeof(struct xlog_op_header); + lv->lv_buf_used += len; + lv->lv_bytes += len; + vec->i_len = len; + + /* Catch buffer overruns */ + ASSERT((void *)lv->lv_buf + lv->lv_bytes <= + (void *)lv + lv->lv_alloc_size); + + lfb->idx++; +} + /* * Format log item into a flat buffers * @@ -454,6 +550,7 @@ xlog_cil_insert_format_items( list_for_each_entry(lip, &tp->t_items, li_trans) { struct xfs_log_vec *lv = lip->li_lv; struct xfs_log_vec *shadow = lip->li_lv_shadow; + struct xlog_format_buf lfb = { }; /* Skip items which aren't dirty in this transaction. */ if (!test_bit(XFS_LI_DIRTY, &lip->li_flags)) @@ -501,8 +598,9 @@ xlog_cil_insert_format_items( lv->lv_item = lip; } + lfb.lv = lv; ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); - lip->li_ops->iop_format(lip, lv); + lip->li_ops->iop_format(lip, &lfb); xfs_cil_prepare_item(log, lip, lv, diff_len); } } @@ -1098,13 +1196,7 @@ xlog_cil_write_commit_record( .i_len = sizeof(struct xlog_op_header), .i_type = XLOG_REG_TYPE_COMMIT, }; - struct xfs_log_vec vec = { - .lv_niovecs = 1, - .lv_iovecp = ®, - }; int error; - LIST_HEAD(lv_chain); - list_add(&vec.lv_list, &lv_chain); if (xlog_is_shutdown(log)) return -EIO; @@ -1112,10 +1204,7 @@ xlog_cil_write_commit_record( error = xlog_cil_order_write(ctx->cil, ctx->sequence, _COMMIT_RECORD); if (error) return error; - - /* account for space used by record data */ - ctx->ticket->t_curr_res -= reg.i_len; - error = xlog_write(log, ctx, &lv_chain, ctx->ticket, reg.i_len); + error = xlog_write_one_vec(log, ctx, ®, ctx->ticket); if (error) xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); return error; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 0fe59f0525aa..cf1e4ce61a8c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -13,6 +13,24 @@ struct xlog; struct xlog_ticket; struct xfs_mount; +struct xfs_log_iovec { + void *i_addr;/* beginning address of region */ + int i_len; /* length in bytes of region */ + uint i_type; /* type of region */ +}; + +struct xfs_log_vec { + struct list_head lv_list; /* CIL lv chain ptrs */ + uint32_t lv_order_id; /* chain ordering info */ + int lv_niovecs; /* number of iovecs in lv */ + struct xfs_log_iovec *lv_iovecp; /* iovec array */ + struct xfs_log_item *lv_item; /* owner */ + char *lv_buf; /* formatted buffer */ + int lv_bytes; /* accounted space in buffer */ + int lv_buf_used; /* buffer space used so far */ + int lv_alloc_size; /* size of allocated lv */ +}; + /* * get client id from packed copy. * @@ -507,6 +525,8 @@ void xlog_print_trans(struct xfs_trans *); int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx, struct list_head *lv_chain, struct xlog_ticket *tic, uint32_t len); +int xlog_write_one_vec(struct xlog *log, struct xfs_cil_ctx *ctx, + struct xfs_log_iovec *reg, struct xlog_ticket *ticket); void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 03e42c7dab56..935905743f94 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -2953,18 +2953,23 @@ xlog_valid_rec_header( xfs_daddr_t blkno, int bufsize) { + struct xfs_mount *mp = log->l_mp; + u32 h_version = be32_to_cpu(rhead->h_version); int hlen; - if (XFS_IS_CORRUPT(log->l_mp, + if (XFS_IS_CORRUPT(mp, rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) return -EFSCORRUPTED; - if (XFS_IS_CORRUPT(log->l_mp, - (!rhead->h_version || - (be32_to_cpu(rhead->h_version) & - (~XLOG_VERSION_OKBITS))))) { - xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", - __func__, be32_to_cpu(rhead->h_version)); - return -EFSCORRUPTED; + + /* + * The log version must match the superblock + */ + if (xfs_has_logv2(mp)) { + if (XFS_IS_CORRUPT(mp, h_version != XLOG_VERSION_2)) + return -EFSCORRUPTED; + } else { + if (XFS_IS_CORRUPT(mp, h_version != XLOG_VERSION_1)) + return -EFSCORRUPTED; } /* @@ -2972,12 +2977,12 @@ xlog_valid_rec_header( * and h_len must not be greater than LR buffer size. */ hlen = be32_to_cpu(rhead->h_len); - if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > bufsize)) + if (XFS_IS_CORRUPT(mp, hlen <= 0 || hlen > bufsize)) return -EFSCORRUPTED; - if (XFS_IS_CORRUPT(log->l_mp, - blkno > log->l_logBBsize || blkno > INT_MAX)) + if (XFS_IS_CORRUPT(mp, blkno > log->l_logBBsize || blkno > INT_MAX)) return -EFSCORRUPTED; + return 0; } diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 19aba2c3d525..fd297082aeb8 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -3,7 +3,7 @@ * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_error.h" #include "xfs_shared.h" @@ -149,14 +149,6 @@ xfs_warn_experimental( .opstate = XFS_OPSTATE_WARNED_LARP, .name = "logged extended attributes", }, - [XFS_EXPERIMENTAL_LBS] = { - .opstate = XFS_OPSTATE_WARNED_LBS, - .name = "large block size", - }, - [XFS_EXPERIMENTAL_METADIR] = { - .opstate = XFS_OPSTATE_WARNED_METADIR, - .name = "metadata directory tree", - }, [XFS_EXPERIMENTAL_ZONED] = { .opstate = XFS_OPSTATE_WARNED_ZONED, .name = "zoned RT device", diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index d68e72379f9d..49b0ef40d299 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -93,8 +93,6 @@ void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg, enum xfs_experimental_feat { XFS_EXPERIMENTAL_SHRINK, XFS_EXPERIMENTAL_LARP, - XFS_EXPERIMENTAL_LBS, - XFS_EXPERIMENTAL_METADIR, XFS_EXPERIMENTAL_ZONED, XFS_EXPERIMENTAL_MAX, diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 0953f6ae94ab..9c295abd0a0a 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -41,6 +41,7 @@ #include "xfs_rtrefcount_btree.h" #include "scrub/stats.h" #include "xfs_zone_alloc.h" +#include "xfs_healthmon.h" static DEFINE_MUTEX(xfs_uuid_table_mutex); static int xfs_uuid_table_size; @@ -625,6 +626,7 @@ xfs_unmount_flush_inodes( cancel_delayed_work_sync(&mp->m_reclaim_work); xfs_reclaim_inodes(mp); xfs_health_unmount(mp); + xfs_healthmon_unmount(mp); } static void diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b871dfde372b..61c71128d171 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -13,6 +13,7 @@ struct xfs_ail; struct xfs_quotainfo; struct xfs_da_geometry; struct xfs_perag; +struct xfs_healthmon; /* dynamic preallocation free space thresholds, 5% down to 1% */ enum { @@ -342,6 +343,9 @@ typedef struct xfs_mount { /* Hook to feed dirent updates to an active online repair. */ struct xfs_hooks m_dir_update_hooks; + + /* Private data referring to a health monitor object. */ + struct xfs_healthmon *m_healthmon; } xfs_mount_t; #define M_IGEO(mp) (&(mp)->m_ino_geo) diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 73b7e72944e4..4e417747688f 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -3,7 +3,7 @@ * Copyright (c) 2006-2007 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_mru_cache.h" /* diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index 6d5002413c2c..6be19fa1ebe2 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -3,7 +3,7 @@ * Copyright (c) 2022 Fujitsu. All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" @@ -22,6 +22,7 @@ #include "xfs_notify_failure.h" #include "xfs_rtgroup.h" #include "xfs_rtrmap_btree.h" +#include "xfs_healthmon.h" #include <linux/mm.h> #include <linux/dax.h> @@ -219,6 +220,8 @@ xfs_dax_notify_logdev_failure( if (error) return error; + xfs_healthmon_report_media(mp, XFS_DEV_LOG, daddr, bblen); + /* * In the pre-remove case the failure notification is attempting to * trigger a force unmount. The expectation is that the device is @@ -252,16 +255,20 @@ xfs_dax_notify_dev_failure( uint64_t bblen; struct xfs_group *xg = NULL; - if (!xfs_has_rmapbt(mp)) { - xfs_debug(mp, "notify_failure() needs rmapbt enabled!"); - return -EOPNOTSUPP; - } - error = xfs_dax_translate_range(xfs_group_type_buftarg(mp, type), offset, len, &daddr, &bblen); if (error) return error; + xfs_healthmon_report_media(mp, + type == XG_TYPE_RTG ? XFS_DEV_RT : XFS_DEV_DATA, + daddr, bblen); + + if (!xfs_has_rmapbt(mp)) { + xfs_debug(mp, "notify_failure() needs rmapbt enabled!"); + return -EOPNOTSUPP; + } + if (type == XG_TYPE_RTG) { start_bno = xfs_daddr_to_rtb(mp, daddr); end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1); diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_platform.h index 55064228c4d5..1e59bf94d1f2 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_platform.h @@ -3,24 +3,11 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#ifndef __XFS_LINUX__ -#define __XFS_LINUX__ +#ifndef _XFS_PLATFORM_H +#define _XFS_PLATFORM_H #include <linux/types.h> #include <linux/uuid.h> - -/* - * Kernel specific type declarations for XFS - */ - -typedef __s64 xfs_off_t; /* <file offset> type */ -typedef unsigned long long xfs_ino_t; /* <inode> type */ -typedef __s64 xfs_daddr_t; /* <disk address> type */ -typedef __u32 xfs_dev_t; -typedef __u32 xfs_nlink_t; - -#include "xfs_types.h" - #include <linux/semaphore.h> #include <linux/mm.h> #include <linux/sched/mm.h> @@ -63,7 +50,6 @@ typedef __u32 xfs_nlink_t; #include <linux/xattr.h> #include <linux/mnt_idmapping.h> #include <linux/debugfs.h> - #include <asm/page.h> #include <asm/div64.h> #include <asm/param.h> @@ -71,6 +57,32 @@ typedef __u32 xfs_nlink_t; #include <asm/byteorder.h> #include <linux/unaligned.h> +#ifdef CONFIG_XFS_DEBUG +#define DEBUG 1 +#endif + +#ifdef CONFIG_XFS_DEBUG_EXPENSIVE +#define DEBUG_EXPENSIVE 1 +#endif + +#ifdef CONFIG_XFS_ASSERT_FATAL +#define XFS_ASSERT_FATAL 1 +#endif + +#ifdef CONFIG_XFS_WARN +#define XFS_WARN 1 +#endif + +/* + * Kernel specific type declarations for XFS + */ +typedef __s64 xfs_off_t; /* <file offset> type */ +typedef unsigned long long xfs_ino_t; /* <inode> type */ +typedef __s64 xfs_daddr_t; /* <disk address> type */ +typedef __u32 xfs_dev_t; +typedef __u32 xfs_nlink_t; + +#include "xfs_types.h" #include "xfs_fs.h" #include "xfs_stats.h" #include "xfs_sysctl.h" @@ -279,4 +291,4 @@ kmem_to_page(void *addr) return virt_to_page(addr); } -#endif /* __XFS_LINUX__ */ +#endif /* _XFS_PLATFORM_H */ diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index afe7497012d4..221e55887a2a 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2014 Christoph Hellwig. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c index c283b801cc5d..7c79ab0db0e2 100644 --- a/fs/xfs/xfs_pwork.c +++ b/fs/xfs/xfs_pwork.c @@ -3,7 +3,7 @@ * Copyright (C) 2019 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 95be67ac6eb4..a3e7d4a107d4 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index edc0aef3cf34..a094b8252ffd 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 022e2179c06b..d50b7318cb5c 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -5,7 +5,7 @@ */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 94fbe3d99ec7..8804508cc2b8 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -3,7 +3,7 @@ * Copyright (c) 2008, Christoph Hellwig * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 3728234699a2..881c3f3a6a24 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" @@ -93,10 +93,9 @@ unsigned int xfs_cui_log_space(unsigned int nr) STATIC void xfs_cui_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_cui_log_item *cuip = CUI_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(atomic_read(&cuip->cui_next_extent) == cuip->cui_format.cui_nextents); @@ -105,7 +104,7 @@ xfs_cui_item_format( cuip->cui_format.cui_type = lip->li_type; cuip->cui_format.cui_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format, xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents)); } @@ -199,17 +198,16 @@ unsigned int xfs_cud_log_space(void) STATIC void xfs_cud_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_cud_log_item *cudp = CUD_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(lip->li_type == XFS_LI_CUD || lip->li_type == XFS_LI_CUD_RT); cudp->cud_format.cud_type = lip->li_type; cudp->cud_format.cud_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, sizeof(struct xfs_cud_log_format)); } diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 3f177b4ec131..db23a0f231d6 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 15f0903f6fd4..a39fe08dcd8f 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" @@ -92,10 +92,9 @@ unsigned int xfs_rui_log_space(unsigned int nr) STATIC void xfs_rui_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_rui_log_item *ruip = RUI_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(atomic_read(&ruip->rui_next_extent) == ruip->rui_format.rui_nextents); @@ -105,7 +104,7 @@ xfs_rui_item_format( ruip->rui_format.rui_type = lip->li_type; ruip->rui_format.rui_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format, xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents)); } @@ -200,17 +199,16 @@ unsigned int xfs_rud_log_space(void) STATIC void xfs_rud_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_rud_log_item *rudp = RUD_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(lip->li_type == XFS_LI_RUD || lip->li_type == XFS_LI_RUD_RT); rudp->rud_format.rud_type = lip->li_type; rudp->rud_format.rud_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format, sizeof(struct xfs_rud_log_format)); } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index a12ffed12391..90a94a5b6f7e 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index 35c7fb3ba324..017db0361cd8 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" struct xstats xfsstats; @@ -23,7 +23,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) uint64_t xs_xstrat_bytes = 0; uint64_t xs_write_bytes = 0; uint64_t xs_read_bytes = 0; - uint64_t defer_relog = 0; + uint64_t xs_defer_relog = 0; + uint64_t xs_gc_bytes = 0; static const struct xstats_entry { char *desc; @@ -57,7 +58,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) { "rtrmapbt_mem", xfsstats_offset(xs_rtrefcbt_2) }, { "rtrefcntbt", xfsstats_offset(xs_qm_dqreclaims)}, /* we print both series of quota information together */ - { "qm", xfsstats_offset(xs_xstrat_bytes)}, + { "qm", xfsstats_offset(xs_gc_read_calls)}, + { "zoned", xfsstats_offset(__pad1)}, }; /* Loop over all stats groups */ @@ -76,19 +78,21 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) xs_xstrat_bytes += per_cpu_ptr(stats, i)->s.xs_xstrat_bytes; xs_write_bytes += per_cpu_ptr(stats, i)->s.xs_write_bytes; xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes; - defer_relog += per_cpu_ptr(stats, i)->s.defer_relog; + xs_defer_relog += per_cpu_ptr(stats, i)->s.xs_defer_relog; + xs_gc_bytes += per_cpu_ptr(stats, i)->s.xs_gc_bytes; } len += scnprintf(buf + len, PATH_MAX-len, "xpc %llu %llu %llu\n", xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n", - defer_relog); + xs_defer_relog); len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n", #if defined(DEBUG) 1); #else 0); #endif + len += scnprintf(buf + len, PATH_MAX-len, "gc xpc %llu\n", xs_gc_bytes); return len; } diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index 15ba1abcf253..153d2381d0a8 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -138,11 +138,17 @@ struct __xfsstats { uint32_t xs_qm_dqwants; uint32_t xs_qm_dquot; uint32_t xs_qm_dquot_unused; +/* Zone GC counters */ + uint32_t xs_gc_read_calls; + uint32_t xs_gc_write_calls; + uint32_t xs_gc_zone_reset_calls; + uint32_t __pad1; /* Extra precision counters */ uint64_t xs_xstrat_bytes; uint64_t xs_write_bytes; uint64_t xs_read_bytes; - uint64_t defer_relog; + uint64_t xs_defer_relog; + uint64_t xs_gc_bytes; }; #define xfsstats_offset(f) (offsetof(struct __xfsstats, f)/sizeof(uint32_t)) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 094f257eff15..76867eb3f975 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -4,7 +4,7 @@ * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" @@ -40,6 +40,8 @@ #include "xfs_defer.h" #include "xfs_attr_item.h" #include "xfs_xattr.h" +#include "xfs_error.h" +#include "xfs_errortag.h" #include "xfs_iunlink_item.h" #include "xfs_dahash_test.h" #include "xfs_rtbitmap.h" @@ -47,12 +49,14 @@ #include "xfs_parent.h" #include "xfs_rtalloc.h" #include "xfs_zone_alloc.h" +#include "xfs_healthmon.h" #include "scrub/stats.h" #include "scrub/rcbag_btree.h" #include <linux/magic.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> +#include <linux/fserror.h> static const struct super_operations xfs_super_operations; @@ -111,7 +115,7 @@ enum { Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones, - Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write, + Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write, Opt_errortag, }; #define fsparam_dead(NAME) \ @@ -170,6 +174,7 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = { fsparam_flag("lifetime", Opt_lifetime), fsparam_flag("nolifetime", Opt_nolifetime), fsparam_string("max_atomic_write", Opt_max_atomic_write), + fsparam_string("errortag", Opt_errortag), {} }; @@ -794,6 +799,9 @@ xfs_mount_free( debugfs_remove(mp->m_debugfs); kfree(mp->m_rtname); kfree(mp->m_logname); +#ifdef DEBUG + kfree(mp->m_errortag); +#endif kfree(mp); } @@ -1273,6 +1281,15 @@ xfs_fs_show_stats( return 0; } +static void +xfs_fs_report_error( + const struct fserror_event *event) +{ + /* healthmon already knows about non-inode and metadata errors */ + if (event->inode && event->type != FSERR_METADATA) + xfs_healthmon_report_file_ioerror(XFS_I(event->inode), event); +} + static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, @@ -1288,6 +1305,7 @@ static const struct super_operations xfs_super_operations = { .free_cached_objects = xfs_fs_free_cached_objects, .shutdown = xfs_fs_shutdown, .show_stats = xfs_fs_show_stats, + .report_error = xfs_fs_report_error, }; static int @@ -1548,6 +1566,8 @@ xfs_fs_parse_param( return -EINVAL; } return 0; + case Opt_errortag: + return xfs_errortag_add_name(parsing_mp, param->string); default: xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); return -EINVAL; @@ -1806,8 +1826,6 @@ xfs_fs_fill_super( error = -ENOSYS; goto out_free_sb; } - - xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LBS); } /* Ensure this filesystem fits in the page cache limits */ @@ -1893,8 +1911,6 @@ xfs_fs_fill_super( goto out_filestream_unmount; } xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED); - } else if (xfs_has_metadir(mp)) { - xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR); } if (xfs_has_reflink(mp)) { @@ -2143,6 +2159,8 @@ xfs_fs_reconfigure( if (error) return error; + xfs_errortag_copy(mp, new_mp); + /* Validate new max_atomic_write option before making other changes */ if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) { error = xfs_set_max_atomic_write_opt(mp, @@ -2229,6 +2247,14 @@ xfs_init_fs_context( mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) return -ENOMEM; +#ifdef DEBUG + mp->m_errortag = kcalloc(XFS_ERRTAG_MAX, sizeof(*mp->m_errortag), + GFP_KERNEL); + if (!mp->m_errortag) { + kfree(mp); + return -ENOMEM; + } +#endif spin_lock_init(&mp->m_sb_lock); for (i = 0; i < XG_TYPE_MAX; i++) diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 4252b07cd251..c4da624fb296 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -4,7 +4,7 @@ * Copyright (c) 2012-2013 Red Hat, Inc. * All rights reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_fs.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 9918f14b4874..7f32d282dc88 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -3,7 +3,7 @@ * Copyright (c) 2001-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_error.h" static struct ctl_table_header *xfs_table_header; diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 7a5c5ef2db92..6c7909838234 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -4,7 +4,7 @@ * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index a60556dbd172..912713a8a019 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -3,7 +3,7 @@ * Copyright (c) 2009, Christoph Hellwig * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_bit.h" @@ -51,6 +51,11 @@ #include "xfs_rtgroup.h" #include "xfs_zone_alloc.h" #include "xfs_zone_priv.h" +#include "xfs_health.h" +#include "xfs_healthmon.h" +#include "xfs_notify_failure.h" +#include "xfs_file.h" +#include <linux/fserror.h> /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f70afbf3cb19..813e5a9f57eb 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -103,6 +103,9 @@ struct xfs_refcount_intent; struct xfs_metadir_update; struct xfs_rtgroup; struct xfs_open_zone; +struct xfs_healthmon_event; +struct xfs_healthmon; +struct fserror_event; #define XFS_ATTR_FILTER_FLAGS \ { XFS_ATTR_ROOT, "ROOT" }, \ @@ -2410,6 +2413,7 @@ DEFINE_ATTR_EVENT(xfs_attr_sf_addname); DEFINE_ATTR_EVENT(xfs_attr_sf_create); DEFINE_ATTR_EVENT(xfs_attr_sf_lookup); DEFINE_ATTR_EVENT(xfs_attr_sf_remove); +DEFINE_ATTR_EVENT(xfs_attr_sf_replace); DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf); DEFINE_ATTR_EVENT(xfs_attr_leaf_add); @@ -5906,6 +5910,515 @@ DEFINE_EVENT(xfs_freeblocks_resv_class, name, \ DEFINE_FREEBLOCKS_RESV_EVENT(xfs_freecounter_reserved); DEFINE_FREEBLOCKS_RESV_EVENT(xfs_freecounter_enospc); +TRACE_EVENT(xfs_healthmon_lost_event, + TP_PROTO(const struct xfs_healthmon *hm), + TP_ARGS(hm), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long long, lost_prev) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->lost_prev = hm->lost_prev_event; + ), + TP_printk("dev %d:%d lost_prev %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->lost_prev) +); + +#define XFS_HEALTHMON_FLAGS_STRINGS \ + { XFS_HEALTH_MONITOR_VERBOSE, "verbose" } +#define XFS_HEALTHMON_FMT_STRINGS \ + { XFS_HEALTH_MONITOR_FMT_V0, "v0" } + +TRACE_EVENT(xfs_healthmon_create, + TP_PROTO(dev_t dev, u64 flags, u8 format), + TP_ARGS(dev, flags, format), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, flags) + __field(u8, format) + ), + TP_fast_assign( + __entry->dev = dev; + __entry->flags = flags; + __entry->format = format; + ), + TP_printk("dev %d:%d flags %s format %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_flags(__entry->flags, "|", XFS_HEALTHMON_FLAGS_STRINGS), + __print_symbolic(__entry->format, XFS_HEALTHMON_FMT_STRINGS)) +); + +TRACE_EVENT(xfs_healthmon_copybuf, + TP_PROTO(const struct xfs_healthmon *hm, const struct iov_iter *iov), + TP_ARGS(hm, iov), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(size_t, bufsize) + __field(size_t, inpos) + __field(size_t, outpos) + __field(size_t, to_copy) + __field(size_t, iter_count) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->bufsize = hm->bufsize; + __entry->inpos = hm->bufhead; + __entry->outpos = hm->buftail; + if (hm->bufhead > hm->buftail) + __entry->to_copy = hm->bufhead - hm->buftail; + else + __entry->to_copy = 0; + __entry->iter_count = iov_iter_count(iov); + ), + TP_printk("dev %d:%d bufsize %zu in_pos %zu out_pos %zu to_copy %zu iter_count %zu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->bufsize, + __entry->inpos, + __entry->outpos, + __entry->to_copy, + __entry->iter_count) +); + +DECLARE_EVENT_CLASS(xfs_healthmon_class, + TP_PROTO(const struct xfs_healthmon *hm), + TP_ARGS(hm), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, events) + __field(unsigned long long, lost_prev) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->events = hm->events; + __entry->lost_prev = hm->lost_prev_event; + ), + TP_printk("dev %d:%d events %u lost_prev? %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->events, + __entry->lost_prev) +); +#define DEFINE_HEALTHMON_EVENT(name) \ +DEFINE_EVENT(xfs_healthmon_class, name, \ + TP_PROTO(const struct xfs_healthmon *hm), \ + TP_ARGS(hm)) +DEFINE_HEALTHMON_EVENT(xfs_healthmon_read_start); +DEFINE_HEALTHMON_EVENT(xfs_healthmon_read_finish); +DEFINE_HEALTHMON_EVENT(xfs_healthmon_release); +DEFINE_HEALTHMON_EVENT(xfs_healthmon_detach); +DEFINE_HEALTHMON_EVENT(xfs_healthmon_report_unmount); + +#define XFS_HEALTHMON_TYPE_STRINGS \ + { XFS_HEALTHMON_LOST, "lost" }, \ + { XFS_HEALTHMON_UNMOUNT, "unmount" }, \ + { XFS_HEALTHMON_SICK, "sick" }, \ + { XFS_HEALTHMON_CORRUPT, "corrupt" }, \ + { XFS_HEALTHMON_HEALTHY, "healthy" }, \ + { XFS_HEALTHMON_SHUTDOWN, "shutdown" } + +#define XFS_HEALTHMON_DOMAIN_STRINGS \ + { XFS_HEALTHMON_MOUNT, "mount" }, \ + { XFS_HEALTHMON_FS, "fs" }, \ + { XFS_HEALTHMON_AG, "ag" }, \ + { XFS_HEALTHMON_INODE, "inode" }, \ + { XFS_HEALTHMON_RTGROUP, "rtgroup" } + +TRACE_DEFINE_ENUM(XFS_HEALTHMON_LOST); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_SHUTDOWN); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_UNMOUNT); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_SICK); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_CORRUPT); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_HEALTHY); + +TRACE_DEFINE_ENUM(XFS_HEALTHMON_MOUNT); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_FS); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_AG); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_INODE); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_RTGROUP); + +DECLARE_EVENT_CLASS(xfs_healthmon_event_class, + TP_PROTO(const struct xfs_healthmon *hm, + const struct xfs_healthmon_event *event), + TP_ARGS(hm, event), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(unsigned int, domain) + __field(unsigned int, mask) + __field(unsigned long long, ino) + __field(unsigned int, gen) + __field(unsigned int, group) + __field(unsigned long long, offset) + __field(unsigned long long, length) + __field(unsigned long long, lostcount) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->type = event->type; + __entry->domain = event->domain; + __entry->mask = 0; + __entry->group = 0; + __entry->ino = 0; + __entry->gen = 0; + __entry->offset = 0; + __entry->length = 0; + __entry->lostcount = 0; + switch (__entry->domain) { + case XFS_HEALTHMON_MOUNT: + switch (__entry->type) { + case XFS_HEALTHMON_SHUTDOWN: + __entry->mask = event->flags; + break; + case XFS_HEALTHMON_LOST: + __entry->lostcount = event->lostcount; + break; + } + break; + case XFS_HEALTHMON_FS: + __entry->mask = event->fsmask; + break; + case XFS_HEALTHMON_AG: + case XFS_HEALTHMON_RTGROUP: + __entry->mask = event->grpmask; + __entry->group = event->group; + break; + case XFS_HEALTHMON_INODE: + __entry->mask = event->imask; + __entry->ino = event->ino; + __entry->gen = event->gen; + break; + case XFS_HEALTHMON_DATADEV: + case XFS_HEALTHMON_LOGDEV: + case XFS_HEALTHMON_RTDEV: + __entry->offset = event->daddr; + __entry->length = event->bbcount; + break; + case XFS_HEALTHMON_FILERANGE: + __entry->ino = event->fino; + __entry->gen = event->fgen; + __entry->offset = event->fpos; + __entry->length = event->flen; + break; + } + ), + TP_printk("dev %d:%d type %s domain %s mask 0x%x ino 0x%llx gen 0x%x offset 0x%llx len 0x%llx group 0x%x lost %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XFS_HEALTHMON_TYPE_STRINGS), + __print_symbolic(__entry->domain, XFS_HEALTHMON_DOMAIN_STRINGS), + __entry->mask, + __entry->ino, + __entry->gen, + __entry->offset, + __entry->length, + __entry->group, + __entry->lostcount) +); +#define DEFINE_HEALTHMONEVENT_EVENT(name) \ +DEFINE_EVENT(xfs_healthmon_event_class, name, \ + TP_PROTO(const struct xfs_healthmon *hm, \ + const struct xfs_healthmon_event *event), \ + TP_ARGS(hm, event)) +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_insert); +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_push); +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_pop); +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_format); +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_format_overflow); +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_drop); +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_merge); + +TRACE_EVENT(xfs_healthmon_report_fs, + TP_PROTO(const struct xfs_healthmon *hm, + unsigned int old_mask, unsigned int new_mask, + const struct xfs_healthmon_event *event), + TP_ARGS(hm, old_mask, new_mask, event), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(unsigned int, domain) + __field(unsigned int, old_mask) + __field(unsigned int, new_mask) + __field(unsigned int, fsmask) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->type = event->type; + __entry->domain = event->domain; + __entry->old_mask = old_mask; + __entry->new_mask = new_mask; + __entry->fsmask = event->fsmask; + ), + TP_printk("dev %d:%d type %s domain %s oldmask 0x%x newmask 0x%x fsmask 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XFS_HEALTHMON_TYPE_STRINGS), + __print_symbolic(__entry->domain, XFS_HEALTHMON_DOMAIN_STRINGS), + __entry->old_mask, + __entry->new_mask, + __entry->fsmask) +); + +TRACE_EVENT(xfs_healthmon_report_group, + TP_PROTO(const struct xfs_healthmon *hm, + unsigned int old_mask, unsigned int new_mask, + const struct xfs_healthmon_event *event), + TP_ARGS(hm, old_mask, new_mask, event), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(unsigned int, domain) + __field(unsigned int, old_mask) + __field(unsigned int, new_mask) + __field(unsigned int, grpmask) + __field(unsigned int, group) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->type = event->type; + __entry->domain = event->domain; + __entry->old_mask = old_mask; + __entry->new_mask = new_mask; + __entry->grpmask = event->grpmask; + __entry->group = event->group; + ), + TP_printk("dev %d:%d type %s domain %s oldmask 0x%x newmask 0x%x grpmask 0x%x group 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XFS_HEALTHMON_TYPE_STRINGS), + __print_symbolic(__entry->domain, XFS_HEALTHMON_DOMAIN_STRINGS), + __entry->old_mask, + __entry->new_mask, + __entry->grpmask, + __entry->group) +); + +TRACE_EVENT(xfs_healthmon_report_inode, + TP_PROTO(const struct xfs_healthmon *hm, + unsigned int old_mask, unsigned int new_mask, + const struct xfs_healthmon_event *event), + TP_ARGS(hm, old_mask, new_mask, event), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(unsigned int, domain) + __field(unsigned int, old_mask) + __field(unsigned int, new_mask) + __field(unsigned int, imask) + __field(unsigned long long, ino) + __field(unsigned int, gen) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->type = event->type; + __entry->domain = event->domain; + __entry->old_mask = old_mask; + __entry->new_mask = new_mask; + __entry->imask = event->imask; + __entry->ino = event->ino; + __entry->gen = event->gen; + ), + TP_printk("dev %d:%d type %s domain %s oldmask 0x%x newmask 0x%x imask 0x%x ino 0x%llx gen 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XFS_HEALTHMON_TYPE_STRINGS), + __print_symbolic(__entry->domain, XFS_HEALTHMON_DOMAIN_STRINGS), + __entry->old_mask, + __entry->new_mask, + __entry->imask, + __entry->ino, + __entry->gen) +); + +TRACE_EVENT(xfs_healthmon_report_shutdown, + TP_PROTO(const struct xfs_healthmon *hm, uint32_t shutdown_flags), + TP_ARGS(hm, shutdown_flags), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(uint32_t, shutdown_flags) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->shutdown_flags = shutdown_flags; + ), + TP_printk("dev %d:%d shutdown_flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_flags(__entry->shutdown_flags, "|", XFS_SHUTDOWN_STRINGS)) +); + +#define XFS_DEVICE_STRINGS \ + { XFS_DEV_DATA, "datadev" }, \ + { XFS_DEV_RT, "rtdev" }, \ + { XFS_DEV_LOG, "logdev" } + +TRACE_DEFINE_ENUM(XFS_DEV_DATA); +TRACE_DEFINE_ENUM(XFS_DEV_RT); +TRACE_DEFINE_ENUM(XFS_DEV_LOG); + +TRACE_EVENT(xfs_healthmon_report_media, + TP_PROTO(const struct xfs_healthmon *hm, enum xfs_device fdev, + const struct xfs_healthmon_event *event), + TP_ARGS(hm, fdev, event), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, error_dev) + __field(uint64_t, daddr) + __field(uint64_t, bbcount) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->error_dev = fdev; + __entry->daddr = event->daddr; + __entry->bbcount = event->bbcount; + ), + TP_printk("dev %d:%d %s daddr 0x%llx bbcount 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->error_dev, XFS_DEVICE_STRINGS), + __entry->daddr, + __entry->bbcount) +); + +#define FS_ERROR_STRINGS \ + { FSERR_BUFFERED_READ, "buffered_read" }, \ + { FSERR_BUFFERED_WRITE, "buffered_write" }, \ + { FSERR_DIRECTIO_READ, "directio_read" }, \ + { FSERR_DIRECTIO_WRITE, "directio_write" }, \ + { FSERR_DATA_LOST, "data_lost" }, \ + { FSERR_METADATA, "metadata" } + +TRACE_DEFINE_ENUM(FSERR_BUFFERED_READ); +TRACE_DEFINE_ENUM(FSERR_BUFFERED_WRITE); +TRACE_DEFINE_ENUM(FSERR_DIRECTIO_READ); +TRACE_DEFINE_ENUM(FSERR_DIRECTIO_WRITE); +TRACE_DEFINE_ENUM(FSERR_DATA_LOST); +TRACE_DEFINE_ENUM(FSERR_METADATA); + +TRACE_EVENT(xfs_healthmon_report_file_ioerror, + TP_PROTO(const struct xfs_healthmon *hm, + const struct fserror_event *p), + TP_ARGS(hm, p), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(unsigned long long, ino) + __field(unsigned int, gen) + __field(long long, pos) + __field(unsigned long long, len) + __field(int, error) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->type = p->type; + __entry->ino = XFS_I(p->inode)->i_ino; + __entry->gen = p->inode->i_generation; + __entry->pos = p->pos; + __entry->len = p->len; + __entry->error = p->error; + ), + TP_printk("dev %d:%d ino 0x%llx gen 0x%x op %s pos 0x%llx bytecount 0x%llx error %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->gen, + __print_symbolic(__entry->type, FS_ERROR_STRINGS), + __entry->pos, + __entry->len, + __entry->error) +); + +TRACE_EVENT(xfs_verify_media, + TP_PROTO(const struct xfs_mount *mp, const struct xfs_verify_media *me, + dev_t fdev, xfs_daddr_t daddr, uint64_t bbcount, + const struct folio *folio), + TP_ARGS(mp, me, fdev, daddr, bbcount, folio), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, fdev) + __field(xfs_daddr_t, start_daddr) + __field(xfs_daddr_t, end_daddr) + __field(unsigned int, flags) + __field(xfs_daddr_t, daddr) + __field(uint64_t, bbcount) + __field(unsigned int, bufsize) + ), + TP_fast_assign( + __entry->dev = mp->m_ddev_targp->bt_dev; + __entry->fdev = fdev; + __entry->start_daddr = me->me_start_daddr; + __entry->end_daddr = me->me_end_daddr; + __entry->flags = me->me_flags; + __entry->daddr = daddr; + __entry->bbcount = bbcount; + __entry->bufsize = folio_size(folio); + ), + TP_printk("dev %d:%d fdev %d:%d start_daddr 0x%llx end_daddr 0x%llx flags 0x%x daddr 0x%llx bbcount 0x%llx bufsize 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->fdev), MINOR(__entry->fdev), + __entry->start_daddr, + __entry->end_daddr, + __entry->flags, + __entry->daddr, + __entry->bbcount, + __entry->bufsize) +); + +TRACE_EVENT(xfs_verify_media_end, + TP_PROTO(const struct xfs_mount *mp, const struct xfs_verify_media *me, + dev_t fdev), + TP_ARGS(mp, me, fdev), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, fdev) + __field(xfs_daddr_t, start_daddr) + __field(xfs_daddr_t, end_daddr) + __field(int, ioerror) + ), + TP_fast_assign( + __entry->dev = mp->m_ddev_targp->bt_dev; + __entry->fdev = fdev; + __entry->start_daddr = me->me_start_daddr; + __entry->end_daddr = me->me_end_daddr; + __entry->ioerror = me->me_ioerror; + ), + TP_printk("dev %d:%d fdev %d:%d start_daddr 0x%llx end_daddr 0x%llx ioerror %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->fdev), MINOR(__entry->fdev), + __entry->start_daddr, + __entry->end_daddr, + __entry->ioerror) +); + +TRACE_EVENT(xfs_verify_media_error, + TP_PROTO(const struct xfs_mount *mp, const struct xfs_verify_media *me, + dev_t fdev, xfs_daddr_t daddr, uint64_t bbcount, + blk_status_t status), + TP_ARGS(mp, me, fdev, daddr, bbcount, status), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, fdev) + __field(xfs_daddr_t, start_daddr) + __field(xfs_daddr_t, end_daddr) + __field(unsigned int, flags) + __field(xfs_daddr_t, daddr) + __field(uint64_t, bbcount) + __field(int, error) + ), + TP_fast_assign( + __entry->dev = mp->m_ddev_targp->bt_dev; + __entry->fdev = fdev; + __entry->start_daddr = me->me_start_daddr; + __entry->end_daddr = me->me_end_daddr; + __entry->flags = me->me_flags; + __entry->daddr = daddr; + __entry->bbcount = bbcount; + __entry->error = blk_status_to_errno(status); + ), + TP_printk("dev %d:%d fdev %d:%d start_daddr 0x%llx end_daddr 0x%llx flags 0x%x daddr 0x%llx bbcount 0x%llx error %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->fdev), MINOR(__entry->fdev), + __entry->start_daddr, + __entry->end_daddr, + __entry->flags, + __entry->daddr, + __entry->bbcount, + __entry->error) +); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 474f5a04ec63..bcc470f56e46 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -4,7 +4,7 @@ * Copyright (C) 2010 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -124,8 +124,6 @@ xfs_trans_dup( ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used; - xfs_trans_switch_context(tp, ntp); - /* move deferred ops over to the new tp */ xfs_defer_move(ntp, tp); @@ -1043,6 +1041,12 @@ xfs_trans_roll( * locked be logged in the prior and the next transactions. */ tp = *tpp; + /* + * __xfs_trans_commit cleared the NOFS flag by calling into + * xfs_trans_free. Set it again here before doing memory + * allocations. + */ + xfs_trans_set_context(tp); error = xfs_log_regrant(tp->t_mountp, tp->t_ticket); if (error) return error; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 7fb860f645a3..eb83c5dac032 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -9,6 +9,7 @@ /* kernel only transaction subsystem defines */ struct xlog; +struct xlog_format_buf; struct xfs_buf; struct xfs_buftarg; struct xfs_efd_log_item; @@ -70,7 +71,8 @@ struct xfs_log_item { struct xfs_item_ops { unsigned flags; void (*iop_size)(struct xfs_log_item *, int *, int *); - void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *); + void (*iop_format)(struct xfs_log_item *lip, + struct xlog_format_buf *lfb); void (*iop_pin)(struct xfs_log_item *); void (*iop_unpin)(struct xfs_log_item *, int remove); uint64_t (*iop_sort)(struct xfs_log_item *lip); @@ -278,13 +280,4 @@ xfs_trans_clear_context( memalloc_nofs_restore(tp->t_pflags); } -static inline void -xfs_trans_switch_context( - struct xfs_trans *old_tp, - struct xfs_trans *new_tp) -{ - new_tp->t_pflags = old_tp->t_pflags; - old_tp->t_pflags = 0; -} - #endif /* __XFS_TRANS_H__ */ diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 38983c6777df..363d7f88c2c6 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -4,7 +4,7 @@ * Copyright (c) 2008 Dave Chinner * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 53af546c0b23..95db73a37e57 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index c842ce06acd6..eaf9de6e07fd 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c new file mode 100644 index 000000000000..069cd371619d --- /dev/null +++ b/fs/xfs/xfs_verify_media.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs_platform.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_bit.h" +#include "xfs_btree.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_trans.h" +#include "xfs_alloc.h" +#include "xfs_ag.h" +#include "xfs_rmap.h" +#include "xfs_rmap_btree.h" +#include "xfs_rtgroup.h" +#include "xfs_rtrmap_btree.h" +#include "xfs_health.h" +#include "xfs_healthmon.h" +#include "xfs_trace.h" +#include "xfs_verify_media.h" + +#include <linux/fserror.h> + +struct xfs_group_data_lost { + xfs_agblock_t startblock; + xfs_extlen_t blockcount; +}; + +/* Report lost file data from rmap records */ +static int +xfs_verify_report_data_lost( + struct xfs_btree_cur *cur, + const struct xfs_rmap_irec *rec, + void *data) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_inode *ip; + struct xfs_group_data_lost *lost = data; + xfs_fileoff_t fileoff = rec->rm_offset; + xfs_extlen_t blocks = rec->rm_blockcount; + const bool is_attr = + (rec->rm_flags & XFS_RMAP_ATTR_FORK); + const xfs_agblock_t lost_end = + lost->startblock + lost->blockcount; + const xfs_agblock_t rmap_end = + rec->rm_startblock + rec->rm_blockcount; + int error = 0; + + if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner)) + return 0; + + error = xfs_iget(mp, cur->bc_tp, rec->rm_owner, 0, 0, &ip); + if (error) + return 0; + + if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) { + xfs_bmap_mark_sick(ip, is_attr ? XFS_ATTR_FORK : XFS_DATA_FORK); + goto out_rele; + } + + if (is_attr) { + xfs_inode_mark_sick(ip, XFS_SICK_INO_XATTR); + goto out_rele; + } + + if (lost->startblock > rec->rm_startblock) { + fileoff += lost->startblock - rec->rm_startblock; + blocks -= lost->startblock - rec->rm_startblock; + } + if (rmap_end > lost_end) + blocks -= rmap_end - lost_end; + + fserror_report_data_lost(VFS_I(ip), XFS_FSB_TO_B(mp, fileoff), + XFS_FSB_TO_B(mp, blocks), GFP_NOFS); + +out_rele: + xfs_irele(ip); + return 0; +} + +/* Walk reverse mappings to look for all file data loss */ +static int +xfs_verify_report_losses( + struct xfs_mount *mp, + enum xfs_group_type type, + xfs_daddr_t daddr, + u64 bblen) +{ + struct xfs_group *xg = NULL; + struct xfs_trans *tp; + xfs_fsblock_t start_bno, end_bno; + uint32_t start_gno, end_gno; + int error; + + if (type == XG_TYPE_RTG) { + start_bno = xfs_daddr_to_rtb(mp, daddr); + end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1); + } else { + start_bno = XFS_DADDR_TO_FSB(mp, daddr); + end_bno = XFS_DADDR_TO_FSB(mp, daddr + bblen - 1); + } + + tp = xfs_trans_alloc_empty(mp); + start_gno = xfs_fsb_to_gno(mp, start_bno, type); + end_gno = xfs_fsb_to_gno(mp, end_bno, type); + while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) { + struct xfs_buf *agf_bp = NULL; + struct xfs_rtgroup *rtg = NULL; + struct xfs_btree_cur *cur; + struct xfs_rmap_irec ri_low = { }; + struct xfs_rmap_irec ri_high; + struct xfs_group_data_lost lost; + + if (type == XG_TYPE_AG) { + struct xfs_perag *pag = to_perag(xg); + + error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp); + if (error) { + xfs_perag_put(pag); + break; + } + + cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag); + } else { + rtg = to_rtg(xg); + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); + cur = xfs_rtrmapbt_init_cursor(tp, rtg); + } + + /* + * Set the rmap range from ri_low to ri_high, which represents + * a [start, end] where we looking for the files or metadata. + */ + memset(&ri_high, 0xFF, sizeof(ri_high)); + if (xg->xg_gno == start_gno) + ri_low.rm_startblock = + xfs_fsb_to_gbno(mp, start_bno, type); + if (xg->xg_gno == end_gno) + ri_high.rm_startblock = + xfs_fsb_to_gbno(mp, end_bno, type); + + lost.startblock = ri_low.rm_startblock; + lost.blockcount = min(xg->xg_block_count, + ri_high.rm_startblock + 1) - + ri_low.rm_startblock; + + error = xfs_rmap_query_range(cur, &ri_low, &ri_high, + xfs_verify_report_data_lost, &lost); + xfs_btree_del_cursor(cur, error); + if (agf_bp) + xfs_trans_brelse(tp, agf_bp); + if (rtg) + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); + if (error) { + xfs_group_put(xg); + break; + } + } + + xfs_trans_cancel(tp); + return 0; +} + +/* + * Compute the desired verify IO size. + * + * To minimize command overhead, we'd like to create bios that are 1MB, though + * we allow the user to ask for a smaller size. + */ +static unsigned int +xfs_verify_iosize( + const struct xfs_verify_media *me, + struct xfs_buftarg *btp, + uint64_t bbcount) +{ + unsigned int iosize = + min_not_zero(SZ_1M, me->me_max_io_size); + + BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT); + ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev)); + + return clamp(iosize, bdev_logical_block_size(btp->bt_bdev), + BBTOB(bbcount)); +} + +/* Allocate as much memory as we can get for verification buffer. */ +static struct folio * +xfs_verify_alloc_folio( + const unsigned int iosize) +{ + unsigned int order = get_order(iosize); + + while (order > 0) { + struct folio *folio = + folio_alloc(GFP_KERNEL | __GFP_NORETRY, order); + + if (folio) + return folio; + order--; + } + + return folio_alloc(GFP_KERNEL, 0); +} + +/* Report any kind of problem verifying media */ +static void +xfs_verify_media_error( + struct xfs_mount *mp, + struct xfs_verify_media *me, + struct xfs_buftarg *btp, + xfs_daddr_t daddr, + unsigned int bio_bbcount, + blk_status_t bio_status) +{ + trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr, + bio_bbcount, bio_status); + + /* + * Pass any error, I/O or otherwise, up to the caller if we didn't + * successfully verify any bytes at all. + */ + if (me->me_start_daddr == daddr) + me->me_ioerror = -blk_status_to_errno(bio_status); + + /* + * PI validation failures, medium errors, or general IO errors are + * treated as indicators of data loss. Everything else are (hopefully) + * transient errors and are not reported to healthmon or fsnotify. + */ + switch (bio_status) { + case BLK_STS_PROTECTION: + case BLK_STS_IOERR: + case BLK_STS_MEDIUM: + break; + default: + return; + } + + if (!(me->me_flags & XFS_VERIFY_MEDIA_REPORT)) + return; + + xfs_healthmon_report_media(mp, me->me_dev, daddr, bio_bbcount); + + if (!xfs_has_rmapbt(mp)) + return; + + switch (me->me_dev) { + case XFS_DEV_DATA: + xfs_verify_report_losses(mp, XG_TYPE_AG, daddr, bio_bbcount); + break; + case XFS_DEV_RT: + xfs_verify_report_losses(mp, XG_TYPE_RTG, daddr, bio_bbcount); + break; + } +} + +/* Verify the media of an xfs device by submitting read requests to the disk. */ +static int +xfs_verify_media( + struct xfs_mount *mp, + struct xfs_verify_media *me) +{ + struct xfs_buftarg *btp = NULL; + struct bio *bio; + struct folio *folio; + xfs_daddr_t daddr; + uint64_t bbcount; + int error = 0; + + me->me_ioerror = 0; + + switch (me->me_dev) { + case XFS_DEV_DATA: + btp = mp->m_ddev_targp; + break; + case XFS_DEV_LOG: + if (mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) + btp = mp->m_logdev_targp; + break; + case XFS_DEV_RT: + btp = mp->m_rtdev_targp; + break; + } + if (!btp) + return -ENODEV; + + /* + * If the caller told us to verify beyond the end of the disk, tell the + * user exactly where that was. + */ + if (me->me_end_daddr > btp->bt_nr_sectors) + me->me_end_daddr = btp->bt_nr_sectors; + + /* start and end have to be aligned to the lba size */ + if (!IS_ALIGNED(BBTOB(me->me_start_daddr | me->me_end_daddr), + bdev_logical_block_size(btp->bt_bdev))) + return -EINVAL; + + /* + * end_daddr is the exclusive end of the range, so if start_daddr + * reaches there (or beyond), there's no work to be done. + */ + if (me->me_start_daddr >= me->me_end_daddr) + return 0; + + /* + * There are three ranges involved here: + * + * - [me->me_start_daddr, me->me_end_daddr) is the range that the + * user wants to verify. end_daddr can be beyond the end of the + * disk; we'll constrain it to the end if necessary. + * + * - [daddr, me->me_end_daddr) is the range that we have not yet + * verified. We update daddr after each successful read. + * me->me_start_daddr is set to daddr before returning. + * + * - [daddr, daddr + bio_bbcount) is the range that we're currently + * verifying. + */ + daddr = me->me_start_daddr; + bbcount = min_t(sector_t, me->me_end_daddr, btp->bt_nr_sectors) - + me->me_start_daddr; + + folio = xfs_verify_alloc_folio(xfs_verify_iosize(me, btp, bbcount)); + if (!folio) + return -ENOMEM; + + trace_xfs_verify_media(mp, me, btp->bt_bdev->bd_dev, daddr, bbcount, + folio); + + bio = bio_alloc(btp->bt_bdev, 1, REQ_OP_READ, GFP_KERNEL); + if (!bio) { + error = -ENOMEM; + goto out_folio; + } + + while (bbcount > 0) { + unsigned int bio_bbcount; + blk_status_t bio_status; + + bio_reset(bio, btp->bt_bdev, REQ_OP_READ); + bio->bi_iter.bi_sector = daddr; + bio_add_folio_nofail(bio, folio, + min(bbcount << SECTOR_SHIFT, folio_size(folio)), + 0); + + /* + * Save the length of the bio before we submit it, because we + * need the original daddr and length for reporting IO errors + * if the bio fails. + */ + bio_bbcount = bio->bi_iter.bi_size >> SECTOR_SHIFT; + submit_bio_wait(bio); + bio_status = bio->bi_status; + if (bio_status != BLK_STS_OK) { + xfs_verify_media_error(mp, me, btp, daddr, bio_bbcount, + bio_status); + error = 0; + break; + } + + daddr += bio_bbcount; + bbcount -= bio_bbcount; + + if (bbcount == 0) + break; + + if (me->me_rest_us) { + ktime_t expires; + + expires = ktime_add_ns(ktime_get(), + me->me_rest_us * 1000); + set_current_state(TASK_KILLABLE); + schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); + } + + if (fatal_signal_pending(current)) { + error = -EINTR; + break; + } + + cond_resched(); + } + + bio_put(bio); +out_folio: + folio_put(folio); + + if (error) + return error; + + /* + * Advance start_daddr to the end of what we verified if there wasn't + * an operational error. + */ + me->me_start_daddr = daddr; + trace_xfs_verify_media_end(mp, me, btp->bt_bdev->bd_dev); + return 0; +} + +int +xfs_ioc_verify_media( + struct file *file, + struct xfs_verify_media __user *arg) +{ + struct xfs_verify_media me; + struct xfs_inode *ip = XFS_I(file_inode(file)); + struct xfs_mount *mp = ip->i_mount; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&me, arg, sizeof(me))) + return -EFAULT; + + if (me.me_pad) + return -EINVAL; + if (me.me_flags & ~XFS_VERIFY_MEDIA_FLAGS) + return -EINVAL; + + switch (me.me_dev) { + case XFS_DEV_DATA: + case XFS_DEV_LOG: + case XFS_DEV_RT: + break; + default: + return -EINVAL; + } + + error = xfs_verify_media(mp, &me); + if (error) + return error; + + if (copy_to_user(arg, &me, sizeof(me))) + return -EFAULT; + + return 0; +} diff --git a/fs/xfs/xfs_verify_media.h b/fs/xfs/xfs_verify_media.h new file mode 100644 index 000000000000..dc6eee9c8863 --- /dev/null +++ b/fs/xfs/xfs_verify_media.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_VERIFY_MEDIA_H__ +#define __XFS_VERIFY_MEDIA_H__ + +struct xfs_verify_media; +int xfs_ioc_verify_media(struct file *file, + struct xfs_verify_media __user *arg); + +#endif /* __XFS_VERIFY_MEDIA_H__ */ diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index ac5cecec9aa1..a735f16d9cd8 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -4,7 +4,7 @@ * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index bbcf21704ea0..b60952565737 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2025 Christoph Hellwig. * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" @@ -408,31 +408,6 @@ xfs_zone_free_blocks( return 0; } -static struct xfs_group * -xfs_find_free_zone( - struct xfs_mount *mp, - unsigned long start, - unsigned long end) -{ - struct xfs_zone_info *zi = mp->m_zone_info; - XA_STATE (xas, &mp->m_groups[XG_TYPE_RTG].xa, start); - struct xfs_group *xg; - - xas_lock(&xas); - xas_for_each_marked(&xas, xg, end, XFS_RTG_FREE) - if (atomic_inc_not_zero(&xg->xg_active_ref)) - goto found; - xas_unlock(&xas); - return NULL; - -found: - xas_clear_mark(&xas, XFS_RTG_FREE); - atomic_dec(&zi->zi_nr_free_zones); - zi->zi_free_zone_cursor = xg->xg_gno; - xas_unlock(&xas); - return xg; -} - static struct xfs_open_zone * xfs_init_open_zone( struct xfs_rtgroup *rtg, @@ -472,13 +447,25 @@ xfs_open_zone( bool is_gc) { struct xfs_zone_info *zi = mp->m_zone_info; + XA_STATE (xas, &mp->m_groups[XG_TYPE_RTG].xa, 0); struct xfs_group *xg; - xg = xfs_find_free_zone(mp, zi->zi_free_zone_cursor, ULONG_MAX); - if (!xg) - xg = xfs_find_free_zone(mp, 0, zi->zi_free_zone_cursor); - if (!xg) - return NULL; + /* + * Pick the free zone with lowest index. Zones in the beginning of the + * address space typically provides higher bandwidth than those at the + * end of the address space on HDDs. + */ + xas_lock(&xas); + xas_for_each_marked(&xas, xg, ULONG_MAX, XFS_RTG_FREE) + if (atomic_inc_not_zero(&xg->xg_active_ref)) + goto found; + xas_unlock(&xas); + return NULL; + +found: + xas_clear_mark(&xas, XFS_RTG_FREE); + atomic_dec(&zi->zi_nr_free_zones); + xas_unlock(&xas); set_current_state(TASK_RUNNING); return xfs_init_open_zone(to_rtg(xg), 0, write_hint, is_gc); @@ -976,46 +963,106 @@ xfs_free_open_zones( } struct xfs_init_zones { - struct xfs_mount *mp; + uint32_t zone_size; + uint32_t zone_capacity; uint64_t available; uint64_t reclaimable; }; +/* + * For sequential write required zones, we restart writing at the hardware write + * pointer returned by xfs_validate_blk_zone(). + * + * For conventional zones or conventional devices we have to query the rmap to + * find the highest recorded block and set the write pointer to the block after + * that. In case of a power loss this misses blocks where the data I/O has + * completed but not recorded in the rmap yet, and it also rewrites blocks if + * the most recently written ones got deleted again before unmount, but this is + * the best we can do without hardware support. + */ +static int +xfs_query_write_pointer( + struct xfs_init_zones *iz, + struct xfs_rtgroup *rtg, + xfs_rgblock_t *write_pointer) +{ + struct xfs_mount *mp = rtg_mount(rtg); + struct block_device *bdev = mp->m_rtdev_targp->bt_bdev; + sector_t start = xfs_gbno_to_daddr(&rtg->rtg_group, 0); + xfs_rgblock_t highest_rgbno; + struct blk_zone zone = {}; + int error; + + if (bdev_is_zoned(bdev)) { + error = blkdev_get_zone_info(bdev, start, &zone); + if (error) + return error; + if (zone.start != start) { + xfs_warn(mp, "mismatched zone start: 0x%llx/0x%llx.", + zone.start, start); + return -EFSCORRUPTED; + } + + if (!xfs_validate_blk_zone(mp, &zone, rtg_rgno(rtg), + iz->zone_size, iz->zone_capacity, + write_pointer)) + return -EFSCORRUPTED; + + /* + * Use the hardware write pointer returned by + * xfs_validate_blk_zone for sequential write required zones, + * else fall through to the rmap-based estimation below. + */ + if (zone.cond != BLK_ZONE_COND_NOT_WP) + return 0; + } + + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); + highest_rgbno = xfs_rtrmap_highest_rgbno(rtg); + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); + + if (highest_rgbno == NULLRGBLOCK) + *write_pointer = 0; + else + *write_pointer = highest_rgbno + 1; + return 0; +} + static int xfs_init_zone( struct xfs_init_zones *iz, struct xfs_rtgroup *rtg, - struct blk_zone *zone) + xfs_rgblock_t write_pointer) { struct xfs_mount *mp = rtg_mount(rtg); struct xfs_zone_info *zi = mp->m_zone_info; uint32_t used = rtg_rmap(rtg)->i_used_blocks; - xfs_rgblock_t write_pointer, highest_rgbno; int error; - if (zone && !xfs_zone_validate(zone, rtg, &write_pointer)) + if (write_pointer > rtg->rtg_extents) { + xfs_warn(mp, "zone %u has invalid write pointer (0x%x).", + rtg_rgno(rtg), write_pointer); return -EFSCORRUPTED; + } - /* - * For sequential write required zones we retrieved the hardware write - * pointer above. - * - * For conventional zones or conventional devices we don't have that - * luxury. Instead query the rmap to find the highest recorded block - * and set the write pointer to the block after that. In case of a - * power loss this misses blocks where the data I/O has completed but - * not recorded in the rmap yet, and it also rewrites blocks if the most - * recently written ones got deleted again before unmount, but this is - * the best we can do without hardware support. - */ - if (!zone || zone->cond == BLK_ZONE_COND_NOT_WP) { - xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); - highest_rgbno = xfs_rtrmap_highest_rgbno(rtg); - if (highest_rgbno == NULLRGBLOCK) - write_pointer = 0; - else - write_pointer = highest_rgbno + 1; - xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); + if (used > rtg->rtg_extents) { + xfs_warn(mp, +"zone %u has used counter (0x%x) larger than zone capacity (0x%llx).", + rtg_rgno(rtg), used, rtg->rtg_extents); + return -EFSCORRUPTED; + } + + if (used > write_pointer) { + xfs_warn(mp, +"zone %u has used counter (0x%x) larger than write pointer (0x%x).", + rtg_rgno(rtg), used, write_pointer); + return -EFSCORRUPTED; + } + + if (write_pointer == 0 && used != 0) { + xfs_warn(mp, "empty zone %u has non-zero used counter (0x%x).", + rtg_rgno(rtg), used); + return -EFSCORRUPTED; } /* @@ -1056,35 +1103,6 @@ xfs_init_zone( return 0; } -static int -xfs_get_zone_info_cb( - struct blk_zone *zone, - unsigned int idx, - void *data) -{ - struct xfs_init_zones *iz = data; - struct xfs_mount *mp = iz->mp; - xfs_fsblock_t zsbno = xfs_daddr_to_rtb(mp, zone->start); - xfs_rgnumber_t rgno; - struct xfs_rtgroup *rtg; - int error; - - if (xfs_rtb_to_rgbno(mp, zsbno) != 0) { - xfs_warn(mp, "mismatched zone start 0x%llx.", zsbno); - return -EFSCORRUPTED; - } - - rgno = xfs_rtb_to_rgno(mp, zsbno); - rtg = xfs_rtgroup_grab(mp, rgno); - if (!rtg) { - xfs_warn(mp, "realtime group not found for zone %u.", rgno); - return -EFSCORRUPTED; - } - error = xfs_init_zone(iz, rtg, zone); - xfs_rtgroup_rele(rtg); - return error; -} - /* * Calculate the max open zone limit based on the of number of backing zones * available. @@ -1219,13 +1237,13 @@ xfs_mount_zones( struct xfs_mount *mp) { struct xfs_init_zones iz = { - .mp = mp, + .zone_capacity = mp->m_groups[XG_TYPE_RTG].blocks, + .zone_size = xfs_rtgroup_raw_size(mp), }; - struct xfs_buftarg *bt = mp->m_rtdev_targp; - xfs_extlen_t zone_blocks = mp->m_groups[XG_TYPE_RTG].blocks; + struct xfs_rtgroup *rtg = NULL; int error; - if (!bt) { + if (!mp->m_rtdev_targp) { xfs_notice(mp, "RT device missing."); return -EINVAL; } @@ -1253,7 +1271,7 @@ xfs_mount_zones( return -ENOMEM; xfs_info(mp, "%u zones of %u blocks (%u max open zones)", - mp->m_sb.sb_rgcount, zone_blocks, mp->m_max_open_zones); + mp->m_sb.sb_rgcount, iz.zone_capacity, mp->m_max_open_zones); trace_xfs_zones_mount(mp); /* @@ -1277,24 +1295,18 @@ xfs_mount_zones( * or beneficial. */ mp->m_super->s_min_writeback_pages = - XFS_FSB_TO_B(mp, min(zone_blocks, XFS_MAX_BMBT_EXTLEN)) >> + XFS_FSB_TO_B(mp, min(iz.zone_capacity, XFS_MAX_BMBT_EXTLEN)) >> PAGE_SHIFT; - if (bdev_is_zoned(bt->bt_bdev)) { - error = blkdev_report_zones_cached(bt->bt_bdev, - XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart), - mp->m_sb.sb_rgcount, xfs_get_zone_info_cb, &iz); - if (error < 0) + while ((rtg = xfs_rtgroup_next(mp, rtg))) { + xfs_rgblock_t write_pointer; + + error = xfs_query_write_pointer(&iz, rtg, &write_pointer); + if (!error) + error = xfs_init_zone(&iz, rtg, write_pointer); + if (error) { + xfs_rtgroup_rele(rtg); goto out_free_zone_info; - } else { - struct xfs_rtgroup *rtg = NULL; - - while ((rtg = xfs_rtgroup_next(mp, rtg))) { - error = xfs_init_zone(&iz, rtg, NULL); - if (error) { - xfs_rtgroup_rele(rtg); - goto out_free_zone_info; - } } } diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 3c52cc1497d4..1f1f9fc973af 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2025 Christoph Hellwig. * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" @@ -16,6 +16,8 @@ #include "xfs_rmap.h" #include "xfs_rtbitmap.h" #include "xfs_rtrmap_btree.h" +#include "xfs_errortag.h" +#include "xfs_error.h" #include "xfs_zone_alloc.h" #include "xfs_zone_priv.h" #include "xfs_zones.h" @@ -50,23 +52,11 @@ */ /* - * Size of each GC scratch pad. This is also the upper bound for each - * GC I/O, which helps to keep latency down. + * Size of each GC scratch allocation, and the number of buffers. */ -#define XFS_GC_CHUNK_SIZE SZ_1M - -/* - * Scratchpad data to read GCed data into. - * - * The offset member tracks where the next allocation starts, and freed tracks - * the amount of space that is not used anymore. - */ -#define XFS_ZONE_GC_NR_SCRATCH 2 -struct xfs_zone_scratch { - struct folio *folio; - unsigned int offset; - unsigned int freed; -}; +#define XFS_GC_BUF_SIZE SZ_1M +#define XFS_GC_NR_BUFS 2 +static_assert(XFS_GC_NR_BUFS < BIO_MAX_VECS); /* * Chunk that is read and written for each GC operation. @@ -141,10 +131,17 @@ struct xfs_zone_gc_data { struct bio_set bio_set; /* - * Scratchpad used, and index to indicated which one is used. + * Scratchpad to buffer GC data, organized as a ring buffer over + * discontiguous folios. scratch_head is where the buffer is filled, + * scratch_tail tracks the buffer space freed, and scratch_available + * counts the space available in the ring buffer between the head and + * the tail. */ - struct xfs_zone_scratch scratch[XFS_ZONE_GC_NR_SCRATCH]; - unsigned int scratch_idx; + struct folio *scratch_folios[XFS_GC_NR_BUFS]; + unsigned int scratch_size; + unsigned int scratch_available; + unsigned int scratch_head; + unsigned int scratch_tail; /* * List of bios currently being read, written and reset. @@ -210,20 +207,17 @@ xfs_zone_gc_data_alloc( if (!data->iter.recs) goto out_free_data; - /* - * We actually only need a single bio_vec. It would be nice to have - * a flag that only allocates the inline bvecs and not the separate - * bvec pool. - */ if (bioset_init(&data->bio_set, 16, offsetof(struct xfs_gc_bio, bio), BIOSET_NEED_BVECS)) goto out_free_recs; - for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++) { - data->scratch[i].folio = - folio_alloc(GFP_KERNEL, get_order(XFS_GC_CHUNK_SIZE)); - if (!data->scratch[i].folio) + for (i = 0; i < XFS_GC_NR_BUFS; i++) { + data->scratch_folios[i] = + folio_alloc(GFP_KERNEL, get_order(XFS_GC_BUF_SIZE)); + if (!data->scratch_folios[i]) goto out_free_scratch; } + data->scratch_size = XFS_GC_BUF_SIZE * XFS_GC_NR_BUFS; + data->scratch_available = data->scratch_size; INIT_LIST_HEAD(&data->reading); INIT_LIST_HEAD(&data->writing); INIT_LIST_HEAD(&data->resetting); @@ -232,7 +226,7 @@ xfs_zone_gc_data_alloc( out_free_scratch: while (--i >= 0) - folio_put(data->scratch[i].folio); + folio_put(data->scratch_folios[i]); bioset_exit(&data->bio_set); out_free_recs: kfree(data->iter.recs); @@ -247,8 +241,8 @@ xfs_zone_gc_data_free( { int i; - for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++) - folio_put(data->scratch[i].folio); + for (i = 0; i < XFS_GC_NR_BUFS; i++) + folio_put(data->scratch_folios[i]); bioset_exit(&data->bio_set); kfree(data->iter.recs); kfree(data); @@ -586,26 +580,6 @@ xfs_zone_gc_ensure_target( return oz; } -static unsigned int -xfs_zone_gc_scratch_available( - struct xfs_zone_gc_data *data) -{ - return XFS_GC_CHUNK_SIZE - data->scratch[data->scratch_idx].offset; -} - -static bool -xfs_zone_gc_space_available( - struct xfs_zone_gc_data *data) -{ - struct xfs_open_zone *oz; - - oz = xfs_zone_gc_ensure_target(data->mp); - if (!oz) - return false; - return oz->oz_allocated < rtg_blocks(oz->oz_rtg) && - xfs_zone_gc_scratch_available(data); -} - static void xfs_zone_gc_end_io( struct bio *bio) @@ -632,8 +606,7 @@ xfs_zone_gc_alloc_blocks( if (!oz) return NULL; - *count_fsb = min(*count_fsb, - XFS_B_TO_FSB(mp, xfs_zone_gc_scratch_available(data))); + *count_fsb = min(*count_fsb, XFS_B_TO_FSB(mp, data->scratch_available)); /* * Directly allocate GC blocks from the reserved pool. @@ -664,6 +637,28 @@ xfs_zone_gc_alloc_blocks( return oz; } +static void +xfs_zone_gc_add_data( + struct xfs_gc_bio *chunk) +{ + struct xfs_zone_gc_data *data = chunk->data; + unsigned int len = chunk->len; + unsigned int off = data->scratch_head; + + do { + unsigned int this_off = off % XFS_GC_BUF_SIZE; + unsigned int this_len = min(len, XFS_GC_BUF_SIZE - this_off); + + bio_add_folio_nofail(&chunk->bio, + data->scratch_folios[off / XFS_GC_BUF_SIZE], + this_len, this_off); + len -= this_len; + off += this_len; + if (off == data->scratch_size) + off = 0; + } while (len); +} + static bool xfs_zone_gc_start_chunk( struct xfs_zone_gc_data *data) @@ -677,6 +672,7 @@ xfs_zone_gc_start_chunk( struct xfs_inode *ip; struct bio *bio; xfs_daddr_t daddr; + unsigned int len; bool is_seq; if (xfs_is_shutdown(mp)) @@ -691,17 +687,19 @@ xfs_zone_gc_start_chunk( return false; } - bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ, GFP_NOFS, &data->bio_set); + len = XFS_FSB_TO_B(mp, irec.rm_blockcount); + bio = bio_alloc_bioset(bdev, + min(howmany(len, XFS_GC_BUF_SIZE) + 1, XFS_GC_NR_BUFS), + REQ_OP_READ, GFP_NOFS, &data->bio_set); chunk = container_of(bio, struct xfs_gc_bio, bio); chunk->ip = ip; chunk->offset = XFS_FSB_TO_B(mp, irec.rm_offset); - chunk->len = XFS_FSB_TO_B(mp, irec.rm_blockcount); + chunk->len = len; chunk->old_startblock = xfs_rgbno_to_rtb(iter->victim_rtg, irec.rm_startblock); chunk->new_daddr = daddr; chunk->is_seq = is_seq; - chunk->scratch = &data->scratch[data->scratch_idx]; chunk->data = data; chunk->oz = oz; chunk->victim_rtg = iter->victim_rtg; @@ -710,13 +708,12 @@ xfs_zone_gc_start_chunk( bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); bio->bi_end_io = xfs_zone_gc_end_io; - bio_add_folio_nofail(bio, chunk->scratch->folio, chunk->len, - chunk->scratch->offset); - chunk->scratch->offset += chunk->len; - if (chunk->scratch->offset == XFS_GC_CHUNK_SIZE) { - data->scratch_idx = - (data->scratch_idx + 1) % XFS_ZONE_GC_NR_SCRATCH; - } + xfs_zone_gc_add_data(chunk); + data->scratch_head = (data->scratch_head + len) % data->scratch_size; + data->scratch_available -= len; + + XFS_STATS_INC(mp, xs_gc_read_calls); + WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); list_add_tail(&chunk->entry, &data->reading); xfs_zone_gc_iter_advance(iter, irec.rm_blockcount); @@ -811,8 +808,6 @@ xfs_zone_gc_write_chunk( { struct xfs_zone_gc_data *data = chunk->data; struct xfs_mount *mp = chunk->ip->i_mount; - phys_addr_t bvec_paddr = - bvec_phys(bio_first_bvec_all(&chunk->bio)); struct xfs_gc_bio *split_chunk; if (chunk->bio.bi_status) @@ -822,13 +817,13 @@ xfs_zone_gc_write_chunk( return; } + XFS_STATS_INC(mp, xs_gc_write_calls); + XFS_STATS_ADD(mp, xs_gc_bytes, chunk->len); + WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); list_move_tail(&chunk->entry, &data->writing); - bio_reset(&chunk->bio, mp->m_rtdev_targp->bt_bdev, REQ_OP_WRITE); - bio_add_folio_nofail(&chunk->bio, chunk->scratch->folio, chunk->len, - offset_in_folio(chunk->scratch->folio, bvec_paddr)); - + bio_reuse(&chunk->bio, REQ_OP_WRITE); while ((split_chunk = xfs_zone_gc_split_write(data, chunk))) xfs_zone_gc_submit_write(data, split_chunk); xfs_zone_gc_submit_write(data, chunk); @@ -839,6 +834,7 @@ xfs_zone_gc_finish_chunk( struct xfs_gc_bio *chunk) { uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; + struct xfs_zone_gc_data *data = chunk->data; struct xfs_inode *ip = chunk->ip; struct xfs_mount *mp = ip->i_mount; int error; @@ -850,11 +846,9 @@ xfs_zone_gc_finish_chunk( return; } - chunk->scratch->freed += chunk->len; - if (chunk->scratch->freed == chunk->scratch->offset) { - chunk->scratch->offset = 0; - chunk->scratch->freed = 0; - } + data->scratch_tail = + (data->scratch_tail + chunk->len) % data->scratch_size; + data->scratch_available += chunk->len; /* * Cycle through the iolock and wait for direct I/O and layouts to @@ -906,39 +900,64 @@ out: bio_put(&chunk->bio); } -static bool -xfs_zone_gc_prepare_reset( - struct bio *bio, - struct xfs_rtgroup *rtg) +static void +xfs_submit_zone_reset_bio( + struct xfs_rtgroup *rtg, + struct bio *bio) { + struct xfs_mount *mp = rtg_mount(rtg); + trace_xfs_zone_reset(rtg); ASSERT(rtg_rmap(rtg)->i_used_blocks == 0); + + if (XFS_TEST_ERROR(mp, XFS_ERRTAG_ZONE_RESET)) { + bio_io_error(bio); + return; + } + + XFS_STATS_INC(mp, xs_gc_zone_reset_calls); + bio->bi_iter.bi_sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0); if (!bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) { - if (!bdev_max_discard_sectors(bio->bi_bdev)) - return false; - bio->bi_opf = REQ_OP_DISCARD | REQ_SYNC; - bio->bi_iter.bi_size = - XFS_FSB_TO_B(rtg_mount(rtg), rtg_blocks(rtg)); + /* + * Also use the bio to drive the state machine when neither + * zone reset nor discard is supported to keep things simple. + */ + if (!bdev_max_discard_sectors(bio->bi_bdev)) { + bio_endio(bio); + return; + } + bio->bi_opf &= ~REQ_OP_ZONE_RESET; + bio->bi_opf |= REQ_OP_DISCARD; + bio->bi_iter.bi_size = XFS_FSB_TO_B(mp, rtg_blocks(rtg)); } - return true; + submit_bio(bio); +} + +static void xfs_bio_wait_endio(struct bio *bio) +{ + complete(bio->bi_private); } int xfs_zone_gc_reset_sync( struct xfs_rtgroup *rtg) { - int error = 0; + DECLARE_COMPLETION_ONSTACK(done); struct bio bio; + int error; bio_init(&bio, rtg_mount(rtg)->m_rtdev_targp->bt_bdev, NULL, 0, - REQ_OP_ZONE_RESET); - if (xfs_zone_gc_prepare_reset(&bio, rtg)) - error = submit_bio_wait(&bio); - bio_uninit(&bio); + REQ_OP_ZONE_RESET | REQ_SYNC); + bio.bi_private = &done; + bio.bi_end_io = xfs_bio_wait_endio; + xfs_submit_zone_reset_bio(rtg, &bio); + wait_for_completion_io(&done); + error = blk_status_to_errno(bio.bi_status); + bio_uninit(&bio); return error; } @@ -973,15 +992,7 @@ xfs_zone_gc_reset_zones( chunk->data = data; WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); list_add_tail(&chunk->entry, &data->resetting); - - /* - * Also use the bio to drive the state machine when neither - * zone reset nor discard is supported to keep things simple. - */ - if (xfs_zone_gc_prepare_reset(bio, rtg)) - submit_bio(bio); - else - bio_endio(bio); + xfs_submit_zone_reset_bio(rtg, bio); } while (next); } @@ -989,9 +1000,15 @@ static bool xfs_zone_gc_should_start_new_work( struct xfs_zone_gc_data *data) { + struct xfs_open_zone *oz; + if (xfs_is_shutdown(data->mp)) return false; - if (!xfs_zone_gc_space_available(data)) + if (!data->scratch_available) + return false; + + oz = xfs_zone_gc_ensure_target(data->mp); + if (!oz || oz->oz_allocated == rtg_blocks(oz->oz_rtg)) return false; if (!data->iter.victim_rtg) { diff --git a/fs/xfs/xfs_zone_info.c b/fs/xfs/xfs_zone_info.c index 07e30c596975..53eabbc3334c 100644 --- a/fs/xfs/xfs_zone_info.c +++ b/fs/xfs/xfs_zone_info.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2025 Christoph Hellwig. * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" diff --git a/fs/xfs/xfs_zone_priv.h b/fs/xfs/xfs_zone_priv.h index ce7f0e2f4598..8fbf9a52964e 100644 --- a/fs/xfs/xfs_zone_priv.h +++ b/fs/xfs/xfs_zone_priv.h @@ -72,7 +72,6 @@ struct xfs_zone_info { /* * Free zone search cursor and number of free zones: */ - unsigned long zi_free_zone_cursor; atomic_t zi_nr_free_zones; /* diff --git a/fs/xfs/xfs_zone_space_resv.c b/fs/xfs/xfs_zone_space_resv.c index fc1a4d1ce10c..5c6e6ef627e4 100644 --- a/fs/xfs/xfs_zone_space_resv.c +++ b/fs/xfs/xfs_zone_space_resv.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2025 Christoph Hellwig. * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" diff --git a/include/linux/bio.h b/include/linux/bio.h index c75a9b3672aa..6156f2d66d4a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -414,6 +414,7 @@ static inline void bio_init_inline(struct bio *bio, struct block_device *bdev, } extern void bio_uninit(struct bio *); void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); +void bio_reuse(struct bio *bio, blk_opf_t opf); void bio_chain(struct bio *, struct bio *); int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len, |
