summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/xfs.rst8
-rw-r--r--block/bio.c34
-rw-r--r--fs/xfs/Makefile2
-rw-r--r--fs/xfs/libxfs/xfs_ag.c2
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c2
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c10
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_attr.c191
-rw-r--r--fs/xfs/libxfs/xfs_attr.h6
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c197
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h1
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c2
-rw-r--r--fs/xfs/libxfs/xfs_bit.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_btree_mem.c2
-rw-r--r--fs/xfs/libxfs/xfs_btree_staging.c2
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h2
-rw-r--r--fs/xfs/libxfs/xfs_defer.c4
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c2
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c2
-rw-r--r--fs/xfs/libxfs/xfs_errortag.h8
-rw-r--r--fs/xfs/libxfs/xfs_exchmaps.c2
-rw-r--r--fs/xfs/libxfs/xfs_fs.h189
-rw-r--r--fs/xfs/libxfs/xfs_group.c2
-rw-r--r--fs/xfs/libxfs/xfs_health.h5
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c2
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c2
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c2
-rw-r--r--fs/xfs/libxfs/xfs_inode_util.c2
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h7
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c2
-rw-r--r--fs/xfs/libxfs/xfs_metadir.c2
-rw-r--r--fs/xfs/libxfs/xfs_metafile.c2
-rw-r--r--fs/xfs/libxfs/xfs_parent.c16
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c2
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_rtgroup.c2
-rw-r--r--fs/xfs/libxfs/xfs_rtgroup.h15
-rw-r--r--fs/xfs/libxfs/xfs_rtrefcount_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_rtrmap_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_sb.c2
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c2
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c2
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.c2
-rw-r--r--fs/xfs/libxfs/xfs_types.c2
-rw-r--r--fs/xfs/libxfs/xfs_zones.c151
-rw-r--r--fs/xfs/libxfs/xfs_zones.h6
-rw-r--r--fs/xfs/scrub/agb_bitmap.c2
-rw-r--r--fs/xfs/scrub/agheader.c2
-rw-r--r--fs/xfs/scrub/agheader_repair.c23
-rw-r--r--fs/xfs/scrub/alloc.c2
-rw-r--r--fs/xfs/scrub/alloc_repair.c22
-rw-r--r--fs/xfs/scrub/attr.c61
-rw-r--r--fs/xfs/scrub/attr_repair.c28
-rw-r--r--fs/xfs/scrub/bitmap.c2
-rw-r--r--fs/xfs/scrub/bmap.c2
-rw-r--r--fs/xfs/scrub/bmap_repair.c8
-rw-r--r--fs/xfs/scrub/btree.c11
-rw-r--r--fs/xfs/scrub/common.c9
-rw-r--r--fs/xfs/scrub/common.h25
-rw-r--r--fs/xfs/scrub/cow_repair.c2
-rw-r--r--fs/xfs/scrub/dabtree.c4
-rw-r--r--fs/xfs/scrub/dir.c15
-rw-r--r--fs/xfs/scrub/dir_repair.c21
-rw-r--r--fs/xfs/scrub/dirtree.c21
-rw-r--r--fs/xfs/scrub/dirtree_repair.c2
-rw-r--r--fs/xfs/scrub/dqiterate.c2
-rw-r--r--fs/xfs/scrub/findparent.c2
-rw-r--r--fs/xfs/scrub/fscounters.c2
-rw-r--r--fs/xfs/scrub/fscounters_repair.c2
-rw-r--r--fs/xfs/scrub/health.c2
-rw-r--r--fs/xfs/scrub/ialloc.c2
-rw-r--r--fs/xfs/scrub/ialloc_repair.c27
-rw-r--r--fs/xfs/scrub/inode.c2
-rw-r--r--fs/xfs/scrub/inode_repair.c2
-rw-r--r--fs/xfs/scrub/iscan.c2
-rw-r--r--fs/xfs/scrub/listxattr.c2
-rw-r--r--fs/xfs/scrub/metapath.c2
-rw-r--r--fs/xfs/scrub/newbt.c2
-rw-r--r--fs/xfs/scrub/nlinks.c11
-rw-r--r--fs/xfs/scrub/nlinks_repair.c2
-rw-r--r--fs/xfs/scrub/orphanage.c2
-rw-r--r--fs/xfs/scrub/parent.c13
-rw-r--r--fs/xfs/scrub/parent_repair.c25
-rw-r--r--fs/xfs/scrub/quota.c2
-rw-r--r--fs/xfs/scrub/quota_repair.c2
-rw-r--r--fs/xfs/scrub/quotacheck.c15
-rw-r--r--fs/xfs/scrub/quotacheck_repair.c2
-rw-r--r--fs/xfs/scrub/rcbag.c2
-rw-r--r--fs/xfs/scrub/rcbag_btree.c2
-rw-r--r--fs/xfs/scrub/readdir.c2
-rw-r--r--fs/xfs/scrub/reap.c2
-rw-r--r--fs/xfs/scrub/refcount.c2
-rw-r--r--fs/xfs/scrub/refcount_repair.c15
-rw-r--r--fs/xfs/scrub/repair.c5
-rw-r--r--fs/xfs/scrub/rgsuper.c2
-rw-r--r--fs/xfs/scrub/rmap.c2
-rw-r--r--fs/xfs/scrub/rmap_repair.c7
-rw-r--r--fs/xfs/scrub/rtbitmap.c2
-rw-r--r--fs/xfs/scrub/rtbitmap_repair.c8
-rw-r--r--fs/xfs/scrub/rtrefcount.c2
-rw-r--r--fs/xfs/scrub/rtrefcount_repair.c17
-rw-r--r--fs/xfs/scrub/rtrmap.c2
-rw-r--r--fs/xfs/scrub/rtrmap_repair.c7
-rw-r--r--fs/xfs/scrub/rtsummary.c9
-rw-r--r--fs/xfs/scrub/rtsummary_repair.c2
-rw-r--r--fs/xfs/scrub/scrub.c4
-rw-r--r--fs/xfs/scrub/stats.c2
-rw-r--r--fs/xfs/scrub/symlink.c2
-rw-r--r--fs/xfs/scrub/symlink_repair.c2
-rw-r--r--fs/xfs/scrub/tempfile.c2
-rw-r--r--fs/xfs/scrub/trace.c2
-rw-r--r--fs/xfs/scrub/xfarray.c2
-rw-r--r--fs/xfs/scrub/xfblob.c2
-rw-r--r--fs/xfs/scrub/xfile.c2
-rw-r--r--fs/xfs/xfs.h28
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_attr_inactive.c2
-rw-r--r--fs/xfs/xfs_attr_item.c29
-rw-r--r--fs/xfs/xfs_attr_list.c2
-rw-r--r--fs/xfs/xfs_bio_io.c2
-rw-r--r--fs/xfs/xfs_bmap_item.c12
-rw-r--r--fs/xfs/xfs_bmap_util.c2
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_buf_item.c21
-rw-r--r--fs/xfs/xfs_buf_item_recover.c2
-rw-r--r--fs/xfs/xfs_buf_mem.c2
-rw-r--r--fs/xfs/xfs_dahash_test.c2
-rw-r--r--fs/xfs/xfs_dir2_readdir.c2
-rw-r--r--fs/xfs/xfs_discard.c2
-rw-r--r--fs/xfs/xfs_dquot.c2
-rw-r--r--fs/xfs/xfs_dquot_item.c11
-rw-r--r--fs/xfs/xfs_dquot_item_recover.c2
-rw-r--r--fs/xfs/xfs_drain.c2
-rw-r--r--fs/xfs/xfs_error.c144
-rw-r--r--fs/xfs/xfs_error.h23
-rw-r--r--fs/xfs/xfs_exchmaps_item.c13
-rw-r--r--fs/xfs/xfs_exchrange.c2
-rw-r--r--fs/xfs/xfs_export.c2
-rw-r--r--fs/xfs/xfs_extent_busy.c2
-rw-r--r--fs/xfs/xfs_extfree_item.c12
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_filestream.c2
-rw-r--r--fs/xfs/xfs_fsmap.c2
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_globals.c2
-rw-r--r--fs/xfs/xfs_handle.c2
-rw-r--r--fs/xfs/xfs_health.c126
-rw-r--r--fs/xfs/xfs_healthmon.c1255
-rw-r--r--fs/xfs/xfs_healthmon.h184
-rw-r--r--fs/xfs/xfs_hooks.c2
-rw-r--r--fs/xfs/xfs_icache.c2
-rw-r--r--fs/xfs/xfs_icreate_item.c8
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_inode_item.c51
-rw-r--r--fs/xfs/xfs_inode_item_recover.c2
-rw-r--r--fs/xfs/xfs_ioctl.c9
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c2
-rw-r--r--fs/xfs/xfs_iops.c2
-rw-r--r--fs/xfs/xfs_itable.c2
-rw-r--r--fs/xfs/xfs_iunlink_item.c2
-rw-r--r--fs/xfs/xfs_iwalk.c2
-rw-r--r--fs/xfs/xfs_log.c294
-rw-r--r--fs/xfs/xfs_log.h65
-rw-r--r--fs/xfs/xfs_log_cil.c113
-rw-r--r--fs/xfs/xfs_log_priv.h20
-rw-r--r--fs/xfs/xfs_log_recover.c29
-rw-r--r--fs/xfs/xfs_message.c10
-rw-r--r--fs/xfs/xfs_message.h2
-rw-r--r--fs/xfs/xfs_mount.c4
-rw-r--r--fs/xfs/xfs_mount.h4
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_notify_failure.c19
-rw-r--r--fs/xfs/xfs_platform.h (renamed from fs/xfs/xfs_linux.h)46
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_pwork.c2
-rw-r--r--fs/xfs/xfs_qm.c2
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c2
-rw-r--r--fs/xfs/xfs_quotaops.c2
-rw-r--r--fs/xfs/xfs_refcount_item.c12
-rw-r--r--fs/xfs/xfs_reflink.c2
-rw-r--r--fs/xfs/xfs_rmap_item.c12
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_stats.c14
-rw-r--r--fs/xfs/xfs_stats.h8
-rw-r--r--fs/xfs/xfs_super.c38
-rw-r--r--fs/xfs/xfs_symlink.c2
-rw-r--r--fs/xfs/xfs_sysctl.c2
-rw-r--r--fs/xfs/xfs_sysfs.c2
-rw-r--r--fs/xfs/xfs_trace.c7
-rw-r--r--fs/xfs/xfs_trace.h513
-rw-r--r--fs/xfs/xfs_trans.c10
-rw-r--r--fs/xfs/xfs_trans.h13
-rw-r--r--fs/xfs/xfs_trans_ail.c2
-rw-r--r--fs/xfs/xfs_trans_buf.c2
-rw-r--r--fs/xfs/xfs_trans_dquot.c2
-rw-r--r--fs/xfs/xfs_verify_media.c445
-rw-r--r--fs/xfs/xfs_verify_media.h13
-rw-r--r--fs/xfs/xfs_xattr.c2
-rw-r--r--fs/xfs/xfs_zone_alloc.c220
-rw-r--r--fs/xfs/xfs_zone_gc.c217
-rw-r--r--fs/xfs/xfs_zone_info.c2
-rw-r--r--fs/xfs/xfs_zone_priv.h1
-rw-r--r--fs/xfs/xfs_zone_space_resv.c2
-rw-r--r--include/linux/bio.h1
222 files changed, 4255 insertions, 1342 deletions
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst
index c85cd327af28..746ea60eed3f 100644
--- a/Documentation/admin-guide/xfs.rst
+++ b/Documentation/admin-guide/xfs.rst
@@ -215,6 +215,14 @@ When mounting an XFS filesystem, the following options are accepted.
inconsistent namespace presentation during or after a
failover event.
+ errortag=tagname
+ When specified, enables the error inject tag named "tagname" with the
+ default frequency. Can be specified multiple times to enable multiple
+ errortags. Specifying this option on remount will reset the error tag
+ to the default value if it was set to any other value before.
+ This option is only supported when CONFIG_XFS_DEBUG is enabled, and
+ will not be reflected in /proc/self/mounts.
+
Deprecation of V4 Format
========================
diff --git a/block/bio.c b/block/bio.c
index e726c0e280a8..40f690985bfb 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -311,6 +311,40 @@ void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf)
}
EXPORT_SYMBOL(bio_reset);
+/**
+ * bio_reuse - reuse a bio with the payload left intact
+ * @bio: bio to reuse
+ * @opf: operation and flags for the next I/O
+ *
+ * Allow reusing an existing bio for another operation with all set up
+ * fields including the payload, device and end_io handler left intact.
+ *
+ * Typically used when @bio is first used to read data which is then written
+ * to another location without modification. @bio must not be in-flight and
+ * owned by the caller. Can't be used for cloned bios.
+ *
+ * Note: Can't be used when @bio has integrity or blk-crypto contexts for now.
+ * Feel free to add that support when you need it, though.
+ */
+void bio_reuse(struct bio *bio, blk_opf_t opf)
+{
+ unsigned short vcnt = bio->bi_vcnt, i;
+ bio_end_io_t *end_io = bio->bi_end_io;
+ void *private = bio->bi_private;
+
+ WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
+ WARN_ON_ONCE(bio_integrity(bio));
+ WARN_ON_ONCE(bio_has_crypt_ctx(bio));
+
+ bio_reset(bio, bio->bi_bdev, opf);
+ for (i = 0; i < vcnt; i++)
+ bio->bi_iter.bi_size += bio->bi_io_vec[i].bv_len;
+ bio->bi_vcnt = vcnt;
+ bio->bi_private = private;
+ bio->bi_end_io = end_io;
+}
+EXPORT_SYMBOL_GPL(bio_reuse);
+
static struct bio *__bio_chain_endio(struct bio *bio)
{
struct bio *parent = bio->bi_private;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 5bf501cf8271..9f7133e02576 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -88,6 +88,7 @@ xfs-y += xfs_aops.o \
xfs_globals.o \
xfs_handle.o \
xfs_health.o \
+ xfs_healthmon.o \
xfs_icache.o \
xfs_ioctl.o \
xfs_iomap.o \
@@ -105,6 +106,7 @@ xfs-y += xfs_aops.o \
xfs_symlink.o \
xfs_sysfs.o \
xfs_trans.o \
+ xfs_verify_media.o \
xfs_xattr.o
# low-level transaction/log code
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index e6ba914f6d06..586918ed1cbf 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -5,7 +5,7 @@
* All rights reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 8ac8230c3d3c..c4cdcc570d61 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -3,7 +3,7 @@
* Copyright (C) 2016 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index ad381c73abc4..d99602bcc16f 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -376,8 +376,8 @@ xfs_alloc_compute_diff(
xfs_agblock_t freeend; /* end of freespace extent */
xfs_agblock_t newbno1; /* return block number */
xfs_agblock_t newbno2; /* other new block number */
- xfs_extlen_t newlen1=0; /* length with newbno1 */
- xfs_extlen_t newlen2=0; /* length with newbno2 */
+ xfs_extlen_t newlen1 = 0; /* length with newbno1 */
+ xfs_extlen_t newlen2 = 0; /* length with newbno2 */
xfs_agblock_t wantend; /* end of target extent */
bool userdata = datatype & XFS_ALLOC_USERDATA;
@@ -577,8 +577,8 @@ xfs_alloc_fixup_trees(
int i; /* operation results */
xfs_agblock_t nfbno1; /* first new free startblock */
xfs_agblock_t nfbno2; /* second new free startblock */
- xfs_extlen_t nflen1=0; /* first new free length */
- xfs_extlen_t nflen2=0; /* second new free length */
+ xfs_extlen_t nflen1 = 0; /* first new free length */
+ xfs_extlen_t nflen2 = 0; /* second new free length */
struct xfs_mount *mp;
bool fixup_longest = false;
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index fa1f03c1331e..29f6ec1c3f6f 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 8c04acd30d48..93caa1dae501 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -50,7 +50,6 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
*/
STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
-STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp);
/*
* Internal routines when attribute list is more than one block.
@@ -351,16 +350,14 @@ xfs_attr_set_resv(
*/
STATIC int
xfs_attr_try_sf_addname(
- struct xfs_inode *dp,
struct xfs_da_args *args)
{
-
int error;
/*
* Build initial attribute list (if required).
*/
- if (dp->i_af.if_format == XFS_DINODE_FMT_EXTENTS)
+ if (args->dp->i_af.if_format == XFS_DINODE_FMT_EXTENTS)
xfs_attr_shortform_create(args);
error = xfs_attr_shortform_addname(args);
@@ -372,9 +369,9 @@ xfs_attr_try_sf_addname(
* NOTE: this is also the error path (EEXIST, etc).
*/
if (!error)
- xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
+ xfs_trans_ichgtime(args->trans, args->dp, XFS_ICHGTIME_CHG);
- if (xfs_has_wsync(dp->i_mount))
+ if (xfs_has_wsync(args->dp->i_mount))
xfs_trans_set_sync(args->trans);
return error;
@@ -385,10 +382,9 @@ xfs_attr_sf_addname(
struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
- struct xfs_inode *dp = args->dp;
int error = 0;
- error = xfs_attr_try_sf_addname(dp, args);
+ error = xfs_attr_try_sf_addname(args);
if (error != -ENOSPC) {
ASSERT(!error || error == -EEXIST);
attr->xattri_dela_state = XFS_DAS_DONE;
@@ -979,11 +975,12 @@ xfs_attr_lookup(
return error;
if (xfs_attr_is_leaf(dp)) {
- error = xfs_attr_leaf_hasname(args, &bp);
-
- if (bp)
- xfs_trans_brelse(args->trans, bp);
-
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner,
+ 0, &bp);
+ if (error)
+ return error;
+ error = xfs_attr3_leaf_lookup_int(bp, args);
+ xfs_trans_brelse(args->trans, bp);
return error;
}
@@ -1032,6 +1029,95 @@ trans_cancel:
}
/*
+ * Decide if it is theoretically possible to try to bypass the attr intent
+ * mechanism for better performance. Other constraints (e.g. available space
+ * in the existing structure) are not considered here.
+ */
+static inline bool
+xfs_attr_can_shortcut(
+ const struct xfs_inode *ip)
+{
+ return xfs_inode_has_attr_fork(ip) && xfs_attr_is_shortform(ip);
+}
+
+/* Try to set an attr in one transaction or fall back to attr intents. */
+int
+xfs_attr_setname(
+ struct xfs_da_args *args,
+ int rmt_blks)
+{
+ int error;
+
+ if (!rmt_blks && xfs_attr_can_shortcut(args->dp)) {
+ args->op_flags |= XFS_DA_OP_ADDNAME;
+
+ error = xfs_attr_try_sf_addname(args);
+ if (error != -ENOSPC)
+ return error;
+ }
+
+ xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET);
+ return 0;
+}
+
+/* Try to remove an attr in one transaction or fall back to attr intents. */
+int
+xfs_attr_removename(
+ struct xfs_da_args *args)
+{
+ if (xfs_attr_can_shortcut(args->dp))
+ return xfs_attr_sf_removename(args);
+
+ xfs_attr_defer_add(args, XFS_ATTR_DEFER_REMOVE);
+ return 0;
+}
+
+/* Try to replace an attr in one transaction or fall back to attr intents. */
+int
+xfs_attr_replacename(
+ struct xfs_da_args *args,
+ int rmt_blks)
+{
+ int error;
+
+ if (rmt_blks || !xfs_attr_can_shortcut(args->dp)) {
+ xfs_attr_defer_add(args, XFS_ATTR_DEFER_REPLACE);
+ return 0;
+ }
+
+ error = xfs_attr_shortform_replace(args);
+ if (error != -ENOSPC)
+ return error;
+
+ args->op_flags |= XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE;
+
+ error = xfs_attr_sf_removename(args);
+ if (error)
+ return error;
+
+ if (args->attr_filter & XFS_ATTR_PARENT) {
+ /*
+ * Move the new name/value to the regular name/value slots and
+ * zero out the new name/value slots because we don't need to
+ * log them for a PPTR_SET operation.
+ */
+ xfs_attr_update_pptr_replace_args(args);
+ args->new_name = NULL;
+ args->new_namelen = 0;
+ args->new_value = NULL;
+ args->new_valuelen = 0;
+ }
+ args->op_flags &= ~XFS_DA_OP_REPLACE;
+
+ error = xfs_attr_try_sf_addname(args);
+ if (error != -ENOSPC)
+ return error;
+
+ xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET);
+ return 0;
+}
+
+/*
* Make a change to the xattr structure.
*
* The caller must have initialized @args, attached dquots, and must not hold
@@ -1111,14 +1197,19 @@ xfs_attr_set(
case -EEXIST:
if (op == XFS_ATTRUPDATE_REMOVE) {
/* if no value, we are performing a remove operation */
- xfs_attr_defer_add(args, XFS_ATTR_DEFER_REMOVE);
+ error = xfs_attr_removename(args);
+ if (error)
+ goto out_trans_cancel;
break;
}
/* Pure create fails if the attr already exists */
if (op == XFS_ATTRUPDATE_CREATE)
goto out_trans_cancel;
- xfs_attr_defer_add(args, XFS_ATTR_DEFER_REPLACE);
+
+ error = xfs_attr_replacename(args, rmt_blks);
+ if (error)
+ goto out_trans_cancel;
break;
case -ENOATTR:
/* Can't remove what isn't there. */
@@ -1128,7 +1219,10 @@ xfs_attr_set(
/* Pure replace fails if no existing attr to replace. */
if (op == XFS_ATTRUPDATE_REPLACE)
goto out_trans_cancel;
- xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET);
+
+ error = xfs_attr_setname(args, rmt_blks);
+ if (error)
+ goto out_trans_cancel;
break;
default:
goto out_trans_cancel;
@@ -1223,27 +1317,6 @@ xfs_attr_shortform_addname(
*========================================================================*/
/*
- * Return EEXIST if attr is found, or ENOATTR if not
- */
-STATIC int
-xfs_attr_leaf_hasname(
- struct xfs_da_args *args,
- struct xfs_buf **bp)
-{
- int error = 0;
-
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, bp);
- if (error)
- return error;
-
- error = xfs_attr3_leaf_lookup_int(*bp, args);
- if (error != -ENOATTR && error != -EEXIST)
- xfs_trans_brelse(args->trans, *bp);
-
- return error;
-}
-
-/*
* Remove a name from the leaf attribute list structure
*
* This leaf block cannot have a "remote" value, we only call this routine
@@ -1253,25 +1326,22 @@ STATIC int
xfs_attr_leaf_removename(
struct xfs_da_args *args)
{
- struct xfs_inode *dp;
- struct xfs_buf *bp;
+ struct xfs_inode *dp = args->dp;
int error, forkoff;
+ struct xfs_buf *bp;
trace_xfs_attr_leaf_removename(args);
- /*
- * Remove the attribute.
- */
- dp = args->dp;
-
- error = xfs_attr_leaf_hasname(args, &bp);
- if (error == -ENOATTR) {
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp);
+ if (error)
+ return error;
+ error = xfs_attr3_leaf_lookup_int(bp, args);
+ if (error != -EEXIST) {
xfs_trans_brelse(args->trans, bp);
- if (args->op_flags & XFS_DA_OP_RECOVERY)
+ if (error == -ENOATTR && (args->op_flags & XFS_DA_OP_RECOVERY))
return 0;
return error;
- } else if (error != -EEXIST)
- return error;
+ }
xfs_attr3_leaf_remove(bp, args);
@@ -1295,23 +1365,20 @@ xfs_attr_leaf_removename(
* Returns 0 on successful retrieval, otherwise an error.
*/
STATIC int
-xfs_attr_leaf_get(xfs_da_args_t *args)
+xfs_attr_leaf_get(
+ struct xfs_da_args *args)
{
- struct xfs_buf *bp;
- int error;
+ struct xfs_buf *bp;
+ int error;
trace_xfs_attr_leaf_get(args);
- error = xfs_attr_leaf_hasname(args, &bp);
-
- if (error == -ENOATTR) {
- xfs_trans_brelse(args->trans, bp);
- return error;
- } else if (error != -EEXIST)
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp);
+ if (error)
return error;
-
-
- error = xfs_attr3_leaf_getvalue(bp, args);
+ error = xfs_attr3_leaf_lookup_int(bp, args);
+ if (error == -EEXIST)
+ error = xfs_attr3_leaf_getvalue(bp, args);
xfs_trans_brelse(args->trans, bp);
return error;
}
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 0e51d0723f9a..8244305949de 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -573,7 +573,7 @@ struct xfs_trans_res xfs_attr_set_resv(const struct xfs_da_args *args);
*/
static inline bool
xfs_attr_is_shortform(
- struct xfs_inode *ip)
+ const struct xfs_inode *ip)
{
return ip->i_af.if_format == XFS_DINODE_FMT_LOCAL ||
(ip->i_af.if_format == XFS_DINODE_FMT_EXTENTS &&
@@ -649,4 +649,8 @@ void xfs_attr_intent_destroy_cache(void);
int xfs_attr_sf_totsize(struct xfs_inode *dp);
int xfs_attr_add_fork(struct xfs_inode *ip, int size, int rsvd);
+int xfs_attr_setname(struct xfs_da_args *args, int rmt_blks);
+int xfs_attr_removename(struct xfs_da_args *args);
+int xfs_attr_replacename(struct xfs_da_args *args, int rmt_blks);
+
#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 91c1b30ebaab..47f48ae555c0 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -4,7 +4,7 @@
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -75,6 +75,59 @@ STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args,
int move_count);
STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
+/* Compute the byte offset of the end of the leaf entry array. */
+static inline int
+xfs_attr_leaf_entries_end(
+ unsigned int hdrcount,
+ const struct xfs_attr_leafblock *leaf)
+{
+ return hdrcount * sizeof(struct xfs_attr_leaf_entry) +
+ xfs_attr3_leaf_hdr_size(leaf);
+}
+
+static inline bool
+ichdr_freemaps_overlap(
+ const struct xfs_attr3_icleaf_hdr *ichdr,
+ unsigned int x,
+ unsigned int y)
+{
+ const unsigned int xend =
+ ichdr->freemap[x].base + ichdr->freemap[x].size;
+ const unsigned int yend =
+ ichdr->freemap[y].base + ichdr->freemap[y].size;
+
+ /* empty slots do not overlap */
+ if (!ichdr->freemap[x].size || !ichdr->freemap[y].size)
+ return false;
+
+ return ichdr->freemap[x].base < yend && xend > ichdr->freemap[y].base;
+}
+
+static inline xfs_failaddr_t
+xfs_attr_leaf_ichdr_freemaps_verify(
+ const struct xfs_attr3_icleaf_hdr *ichdr,
+ const struct xfs_attr_leafblock *leaf)
+{
+ unsigned int entries_end =
+ xfs_attr_leaf_entries_end(ichdr->count, leaf);
+ int i;
+
+ if (ichdr_freemaps_overlap(ichdr, 0, 1))
+ return __this_address;
+ if (ichdr_freemaps_overlap(ichdr, 0, 2))
+ return __this_address;
+ if (ichdr_freemaps_overlap(ichdr, 1, 2))
+ return __this_address;
+
+ for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
+ if (ichdr->freemap[i].size > 0 &&
+ ichdr->freemap[i].base < entries_end)
+ return __this_address;
+ }
+
+ return NULL;
+}
+
/*
* attr3 block 'firstused' conversion helpers.
*
@@ -218,6 +271,8 @@ xfs_attr3_leaf_hdr_to_disk(
hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base);
hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size);
}
+
+ ASSERT(xfs_attr_leaf_ichdr_freemaps_verify(from, to) == NULL);
return;
}
to->hdr.info.forw = cpu_to_be32(from->forw);
@@ -233,6 +288,8 @@ xfs_attr3_leaf_hdr_to_disk(
to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base);
to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size);
}
+
+ ASSERT(xfs_attr_leaf_ichdr_freemaps_verify(from, to) == NULL);
}
static xfs_failaddr_t
@@ -385,6 +442,10 @@ xfs_attr3_leaf_verify(
return __this_address;
}
+ fa = xfs_attr_leaf_ichdr_freemaps_verify(&ichdr, leaf);
+ if (fa)
+ return fa;
+
return NULL;
}
@@ -782,6 +843,44 @@ xfs_attr_sf_findname(
}
/*
+ * Replace a shortform xattr if it's the right length. Returns 0 on success,
+ * -ENOSPC if the length is wrong, or -ENOATTR if the attr was not found.
+ */
+int
+xfs_attr_shortform_replace(
+ struct xfs_da_args *args)
+{
+ struct xfs_attr_sf_entry *sfe;
+
+ ASSERT(args->dp->i_af.if_format == XFS_DINODE_FMT_LOCAL);
+
+ trace_xfs_attr_sf_replace(args);
+
+ sfe = xfs_attr_sf_findname(args);
+ if (!sfe)
+ return -ENOATTR;
+
+ if (args->attr_filter & XFS_ATTR_PARENT) {
+ if (sfe->namelen != args->new_namelen ||
+ sfe->valuelen != args->new_valuelen)
+ return -ENOSPC;
+
+ memcpy(sfe->nameval, args->new_name, sfe->namelen);
+ memcpy(&sfe->nameval[sfe->namelen], args->new_value,
+ sfe->valuelen);
+ } else {
+ if (sfe->valuelen != args->valuelen)
+ return -ENOSPC;
+ memcpy(&sfe->nameval[sfe->namelen], args->value,
+ sfe->valuelen);
+ }
+
+ xfs_trans_log_inode(args->trans, args->dp,
+ XFS_ILOG_CORE | XFS_ILOG_ADATA);
+ return 0;
+}
+
+/*
* Add a name/value pair to the shortform attribute list.
* Overflow from the inode has already been checked for.
*/
@@ -1409,8 +1508,7 @@ xfs_attr3_leaf_add(
* Search through freemap for first-fit on new name length.
* (may need to figure in size of entry struct too)
*/
- tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t)
- + xfs_attr3_leaf_hdr_size(leaf);
+ tablesize = xfs_attr_leaf_entries_end(ichdr.count + 1, leaf);
for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) {
if (tablesize > ichdr.firstused) {
sum += ichdr.freemap[i].size;
@@ -1476,6 +1574,7 @@ xfs_attr3_leaf_add_work(
struct xfs_attr_leaf_name_local *name_loc;
struct xfs_attr_leaf_name_remote *name_rmt;
struct xfs_mount *mp;
+ int old_end, new_end;
int tmp;
int i;
@@ -1568,17 +1667,48 @@ xfs_attr3_leaf_add_work(
if (be16_to_cpu(entry->nameidx) < ichdr->firstused)
ichdr->firstused = be16_to_cpu(entry->nameidx);
- ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t)
- + xfs_attr3_leaf_hdr_size(leaf));
- tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t)
- + xfs_attr3_leaf_hdr_size(leaf);
+ new_end = xfs_attr_leaf_entries_end(ichdr->count, leaf);
+ old_end = new_end - sizeof(struct xfs_attr_leaf_entry);
+
+ ASSERT(ichdr->firstused >= new_end);
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
- if (ichdr->freemap[i].base == tmp) {
- ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t);
+ int diff = 0;
+
+ if (ichdr->freemap[i].base == old_end) {
+ /*
+ * This freemap entry starts at the old end of the
+ * leaf entry array, so we need to adjust its base
+ * upward to accomodate the larger array.
+ */
+ diff = sizeof(struct xfs_attr_leaf_entry);
+ } else if (ichdr->freemap[i].size > 0 &&
+ ichdr->freemap[i].base < new_end) {
+ /*
+ * This freemap entry starts in the space claimed by
+ * the new leaf entry. Adjust its base upward to
+ * reflect that.
+ */
+ diff = new_end - ichdr->freemap[i].base;
+ }
+
+ if (diff) {
+ ichdr->freemap[i].base += diff;
ichdr->freemap[i].size -=
- min_t(uint16_t, ichdr->freemap[i].size,
- sizeof(xfs_attr_leaf_entry_t));
+ min_t(uint16_t, ichdr->freemap[i].size, diff);
+ }
+
+ /*
+ * Don't leave zero-length freemaps with nonzero base lying
+ * around, because we don't want the code in _remove that
+ * matches on base address to get confused and create
+ * overlapping freemaps. If we end up with no freemap entries
+ * then the next _add will compact the leaf block and
+ * regenerate the freemaps.
+ */
+ if (ichdr->freemap[i].size == 0 && ichdr->freemap[i].base > 0) {
+ ichdr->freemap[i].base = 0;
+ ichdr->holes = 1;
}
}
ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
@@ -1623,6 +1753,10 @@ xfs_attr3_leaf_compact(
ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src);
ichdr_dst->freemap[0].size = ichdr_dst->firstused -
ichdr_dst->freemap[0].base;
+ ichdr_dst->freemap[1].base = 0;
+ ichdr_dst->freemap[2].base = 0;
+ ichdr_dst->freemap[1].size = 0;
+ ichdr_dst->freemap[2].size = 0;
/* write the header back to initialise the underlying buffer */
xfs_attr3_leaf_hdr_to_disk(args->geo, leaf_dst, ichdr_dst);
@@ -1774,8 +1908,8 @@ xfs_attr3_leaf_rebalance(
/*
* leaf2 is the destination, compact it if it looks tight.
*/
- max = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1);
- max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t);
+ max = ichdr2.firstused -
+ xfs_attr_leaf_entries_end(ichdr2.count, leaf1);
if (space > max)
xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp);
@@ -1803,8 +1937,8 @@ xfs_attr3_leaf_rebalance(
/*
* leaf1 is the destination, compact it if it looks tight.
*/
- max = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1);
- max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t);
+ max = ichdr1.firstused -
+ xfs_attr_leaf_entries_end(ichdr1.count, leaf1);
if (space > max)
xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp);
@@ -2010,9 +2144,7 @@ xfs_attr3_leaf_toosmall(
blk = &state->path.blk[ state->path.active-1 ];
leaf = blk->bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr, leaf);
- bytes = xfs_attr3_leaf_hdr_size(leaf) +
- ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
- ichdr.usedbytes;
+ bytes = xfs_attr_leaf_entries_end(ichdr.count, leaf) + ichdr.usedbytes;
if (bytes > (state->args->geo->blksize >> 1)) {
*action = 0; /* blk over 50%, don't try to join */
return 0;
@@ -2070,9 +2202,8 @@ xfs_attr3_leaf_toosmall(
bytes = state->args->geo->blksize -
(state->args->geo->blksize >> 2) -
ichdr.usedbytes - ichdr2.usedbytes -
- ((ichdr.count + ichdr2.count) *
- sizeof(xfs_attr_leaf_entry_t)) -
- xfs_attr3_leaf_hdr_size(leaf);
+ xfs_attr_leaf_entries_end(ichdr.count + ichdr2.count,
+ leaf);
xfs_trans_brelse(state->args->trans, bp);
if (bytes >= 0)
@@ -2134,8 +2265,7 @@ xfs_attr3_leaf_remove(
ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8);
ASSERT(args->index >= 0 && args->index < ichdr.count);
- ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) +
- xfs_attr3_leaf_hdr_size(leaf));
+ ASSERT(ichdr.firstused >= xfs_attr_leaf_entries_end(ichdr.count, leaf));
entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
@@ -2148,8 +2278,7 @@ xfs_attr3_leaf_remove(
* find smallest free region in case we need to replace it,
* adjust any map that borders the entry table,
*/
- tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t)
- + xfs_attr3_leaf_hdr_size(leaf);
+ tablesize = xfs_attr_leaf_entries_end(ichdr.count, leaf);
tmp = ichdr.freemap[0].size;
before = after = -1;
smallest = XFS_ATTR_LEAF_MAPSIZE - 1;
@@ -2256,8 +2385,7 @@ xfs_attr3_leaf_remove(
* Check if leaf is less than 50% full, caller may want to
* "join" the leaf with a sibling if so.
*/
- tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) +
- ichdr.count * sizeof(xfs_attr_leaf_entry_t);
+ tmp = ichdr.usedbytes + xfs_attr_leaf_entries_end(ichdr.count, leaf);
return tmp < args->geo->magicpct; /* leaf is < 37% full */
}
@@ -2580,11 +2708,11 @@ xfs_attr3_leaf_moveents(
ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC);
ASSERT(ichdr_s->magic == ichdr_d->magic);
ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8);
- ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s))
- + xfs_attr3_leaf_hdr_size(leaf_s));
+ ASSERT(ichdr_s->firstused >=
+ xfs_attr_leaf_entries_end(ichdr_s->count, leaf_s));
ASSERT(ichdr_d->count < args->geo->blksize / 8);
- ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d))
- + xfs_attr3_leaf_hdr_size(leaf_d));
+ ASSERT(ichdr_d->firstused >=
+ xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d));
ASSERT(start_s < ichdr_s->count);
ASSERT(start_d <= ichdr_d->count);
@@ -2644,8 +2772,7 @@ xfs_attr3_leaf_moveents(
ichdr_d->usedbytes += tmp;
ichdr_s->count -= 1;
ichdr_d->count += 1;
- tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t)
- + xfs_attr3_leaf_hdr_size(leaf_d);
+ tmp = xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d);
ASSERT(ichdr_d->firstused >= tmp);
#ifdef GROT
}
@@ -2681,8 +2808,8 @@ xfs_attr3_leaf_moveents(
/*
* Fill in the freemap information
*/
- ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d);
- ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t);
+ ichdr_d->freemap[0].base =
+ xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d);
ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base;
ichdr_d->freemap[1].base = 0;
ichdr_d->freemap[2].base = 0;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index 589f810eedc0..aca46da2bc50 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -46,6 +46,7 @@ struct xfs_attr3_icleaf_hdr {
* Internal routines when attribute fork size < XFS_LITINO(mp).
*/
void xfs_attr_shortform_create(struct xfs_da_args *args);
+int xfs_attr_shortform_replace(struct xfs_da_args *args);
void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
int xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index bff3dc226f81..e6c8dd1a997a 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -4,7 +4,7 @@
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c
index 40ce5f3094d1..f05a07c0f75d 100644
--- a/fs/xfs/libxfs/xfs_bit.c
+++ b/fs/xfs/libxfs/xfs_bit.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_log_format.h"
#include "xfs_bit.h"
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 53ef4b7e504d..7a4c8f1aa76c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 188feac04b60..1c7165df483a 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index dbe9df8c3300..7012f3570c8d 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_btree_mem.c b/fs/xfs/libxfs/xfs_btree_mem.c
index f2f7b4305413..37136a70e56d 100644
--- a/fs/xfs/libxfs/xfs_btree_mem.c
+++ b/fs/xfs/libxfs/xfs_btree_mem.c
@@ -3,7 +3,7 @@
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c
index 5ed84f9cc877..4300c058807b 100644
--- a/fs/xfs/libxfs/xfs_btree_staging.c
+++ b/fs/xfs/libxfs/xfs_btree_staging.c
@@ -3,7 +3,7 @@
* Copyright (C) 2020 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 90f7fc219fcc..766631f0562e 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -4,7 +4,7 @@
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 86de99e2f757..7d55307e619f 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -746,7 +746,7 @@ struct xfs_attr3_leafblock {
#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t))
static inline int
-xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)
+xfs_attr3_leaf_hdr_size(const struct xfs_attr_leafblock *leafp)
{
if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
return sizeof(struct xfs_attr3_leaf_hdr);
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 5b377cbbb1f7..c39e40dcb0b0 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -3,7 +3,7 @@
* Copyright (C) 2016 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -565,7 +565,7 @@ xfs_defer_relog(
continue;
trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
- XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
+ XFS_STATS_INC((*tpp)->t_mountp, xs_defer_relog);
xfs_defer_relog_intent(*tpp, dfp);
}
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 82a338458a51..107c1a5b8a96 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 0f93ed1a4a74..6d70e6b429e7 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -4,7 +4,7 @@
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index a16b05c43e2e..80ba94f51e5c 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -4,7 +4,7 @@
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 71c2f22a3f6e..bc909543eb74 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -4,7 +4,7 @@
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index fe8d4fa13128..ed0b5287a44f 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -4,7 +4,7 @@
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 17a20384c8b7..1a67cdd6a707 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index dceef2abd4e2..ce767b40482f 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -4,7 +4,7 @@
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
index 57e47077c75a..6de207fed2d8 100644
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -53,7 +53,7 @@
* Drop-writes support removed because write error handling cannot trash
* pre-existing delalloc extents in any useful way anymore. We retain the
* definition so that we can reject it as an invalid value in
- * xfs_errortag_valid().
+ * xfs_errortag_add().
*/
#define XFS_ERRTAG_DROP_WRITES 28
#define XFS_ERRTAG_LOG_BAD_CRC 29
@@ -74,7 +74,8 @@
#define XFS_ERRTAG_EXCHMAPS_FINISH_ONE 44
#define XFS_ERRTAG_METAFILE_RESV_CRITICAL 45
#define XFS_ERRTAG_FORCE_ZERO_RANGE 46
-#define XFS_ERRTAG_MAX 47
+#define XFS_ERRTAG_ZONE_RESET 47
+#define XFS_ERRTAG_MAX 48
/*
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -135,7 +136,8 @@ XFS_ERRTAG(WB_DELAY_MS, wb_delay_ms, 3000) \
XFS_ERRTAG(WRITE_DELAY_MS, write_delay_ms, 3000) \
XFS_ERRTAG(EXCHMAPS_FINISH_ONE, exchmaps_finish_one, 1) \
XFS_ERRTAG(METAFILE_RESV_CRITICAL, metafile_resv_crit, 4) \
-XFS_ERRTAG(FORCE_ZERO_RANGE, force_zero_range, 4)
+XFS_ERRTAG(FORCE_ZERO_RANGE, force_zero_range, 4) \
+XFS_ERRTAG(ZONE_RESET, zone_reset, 1)
#endif /* XFS_ERRTAG */
#endif /* __XFS_ERRORTAG_H_ */
diff --git a/fs/xfs/libxfs/xfs_exchmaps.c b/fs/xfs/libxfs/xfs_exchmaps.c
index 932ee4619e9e..5d28f4eac527 100644
--- a/fs/xfs/libxfs/xfs_exchmaps.c
+++ b/fs/xfs/libxfs/xfs_exchmaps.c
@@ -3,7 +3,7 @@
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 12463ba766da..d165de607d17 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -1003,6 +1003,191 @@ struct xfs_rtgroup_geometry {
#define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */
#define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */
+/* Health monitor event domains */
+
+/* affects the whole fs */
+#define XFS_HEALTH_MONITOR_DOMAIN_MOUNT (0)
+
+/* metadata health events */
+#define XFS_HEALTH_MONITOR_DOMAIN_FS (1)
+#define XFS_HEALTH_MONITOR_DOMAIN_AG (2)
+#define XFS_HEALTH_MONITOR_DOMAIN_INODE (3)
+#define XFS_HEALTH_MONITOR_DOMAIN_RTGROUP (4)
+
+/* disk events */
+#define XFS_HEALTH_MONITOR_DOMAIN_DATADEV (5)
+#define XFS_HEALTH_MONITOR_DOMAIN_RTDEV (6)
+#define XFS_HEALTH_MONITOR_DOMAIN_LOGDEV (7)
+
+/* file range events */
+#define XFS_HEALTH_MONITOR_DOMAIN_FILERANGE (8)
+
+/* Health monitor event types */
+
+/* status of the monitor itself */
+#define XFS_HEALTH_MONITOR_TYPE_RUNNING (0)
+#define XFS_HEALTH_MONITOR_TYPE_LOST (1)
+
+/* filesystem was unmounted */
+#define XFS_HEALTH_MONITOR_TYPE_UNMOUNT (2)
+
+/* metadata health events */
+#define XFS_HEALTH_MONITOR_TYPE_SICK (3)
+#define XFS_HEALTH_MONITOR_TYPE_CORRUPT (4)
+#define XFS_HEALTH_MONITOR_TYPE_HEALTHY (5)
+
+/* filesystem shutdown */
+#define XFS_HEALTH_MONITOR_TYPE_SHUTDOWN (6)
+
+/* media errors */
+#define XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR (7)
+
+/* pagecache I/O to a file range failed */
+#define XFS_HEALTH_MONITOR_TYPE_BUFREAD (8)
+#define XFS_HEALTH_MONITOR_TYPE_BUFWRITE (9)
+
+/* direct I/O to a file range failed */
+#define XFS_HEALTH_MONITOR_TYPE_DIOREAD (10)
+#define XFS_HEALTH_MONITOR_TYPE_DIOWRITE (11)
+
+/* out of band media error reported for a file range */
+#define XFS_HEALTH_MONITOR_TYPE_DATALOST (12)
+
+/* lost events */
+struct xfs_health_monitor_lost {
+ __u64 count;
+};
+
+/* fs/rt metadata */
+struct xfs_health_monitor_fs {
+ /* XFS_FSOP_GEOM_SICK_* flags */
+ __u32 mask;
+};
+
+/* ag/rtgroup metadata */
+struct xfs_health_monitor_group {
+ /* XFS_{AG,RTGROUP}_SICK_* flags */
+ __u32 mask;
+ __u32 gno;
+};
+
+/* inode metadata */
+struct xfs_health_monitor_inode {
+ /* XFS_BS_SICK_* flags */
+ __u32 mask;
+ __u32 gen;
+ __u64 ino;
+};
+
+/* shutdown reasons */
+#define XFS_HEALTH_SHUTDOWN_META_IO_ERROR (1u << 0)
+#define XFS_HEALTH_SHUTDOWN_LOG_IO_ERROR (1u << 1)
+#define XFS_HEALTH_SHUTDOWN_FORCE_UMOUNT (1u << 2)
+#define XFS_HEALTH_SHUTDOWN_CORRUPT_INCORE (1u << 3)
+#define XFS_HEALTH_SHUTDOWN_CORRUPT_ONDISK (1u << 4)
+#define XFS_HEALTH_SHUTDOWN_DEVICE_REMOVED (1u << 5)
+
+/* shutdown */
+struct xfs_health_monitor_shutdown {
+ /* XFS_HEALTH_SHUTDOWN_* flags */
+ __u32 reasons;
+};
+
+/* file range events */
+struct xfs_health_monitor_filerange {
+ __u64 pos;
+ __u64 len;
+ __u64 ino;
+ __u32 gen;
+ __u32 error;
+};
+
+/* disk media errors */
+struct xfs_health_monitor_media {
+ __u64 daddr;
+ __u64 bbcount;
+};
+
+struct xfs_health_monitor_event {
+ /* XFS_HEALTH_MONITOR_DOMAIN_* */
+ __u32 domain;
+
+ /* XFS_HEALTH_MONITOR_TYPE_* */
+ __u32 type;
+
+ /* Timestamp of the event, in nanoseconds since the Unix epoch */
+ __u64 time_ns;
+
+ /*
+ * Details of the event. The primary clients are written in python
+ * and rust, so break this up because bindgen hates anonymous structs
+ * and unions.
+ */
+ union {
+ struct xfs_health_monitor_lost lost;
+ struct xfs_health_monitor_fs fs;
+ struct xfs_health_monitor_group group;
+ struct xfs_health_monitor_inode inode;
+ struct xfs_health_monitor_shutdown shutdown;
+ struct xfs_health_monitor_media media;
+ struct xfs_health_monitor_filerange filerange;
+ } e;
+
+ /* zeroes */
+ __u64 pad[2];
+};
+
+struct xfs_health_monitor {
+ __u64 flags; /* flags */
+ __u8 format; /* output format */
+ __u8 pad[23]; /* zeroes */
+};
+
+/* Return all health status events, not just deltas */
+#define XFS_HEALTH_MONITOR_VERBOSE (1ULL << 0)
+
+#define XFS_HEALTH_MONITOR_ALL (XFS_HEALTH_MONITOR_VERBOSE)
+
+/* Initial return format version */
+#define XFS_HEALTH_MONITOR_FMT_V0 (0)
+
+/*
+ * Check that a given fd points to the same filesystem that the health monitor
+ * is monitoring.
+ */
+struct xfs_health_file_on_monitored_fs {
+ __s32 fd;
+ __u32 flags; /* zero for now */
+};
+
+/* Verify the media of the underlying devices */
+struct xfs_verify_media {
+ __u32 me_dev; /* I: XFS_DEV_{DATA,LOG,RT} */
+ __u32 me_flags; /* I: XFS_VERIFY_MEDIA_* */
+
+ /*
+ * IO: inclusive start of disk range to verify, in 512b blocks.
+ * Will be adjusted upwards as media verification succeeds.
+ */
+ __u64 me_start_daddr;
+
+ /*
+ * IO: exclusive end of the disk range to verify, in 512b blocks.
+ * Can be adjusted downwards to match device size.
+ */
+ __u64 me_end_daddr;
+
+ __u32 me_ioerror; /* O: I/O error (positive) */
+ __u32 me_max_io_size; /* I: maximum IO size in bytes */
+
+ __u32 me_rest_us; /* I: rest time between IOs, usecs */
+ __u32 me_pad; /* zero */
+};
+
+#define XFS_VERIFY_MEDIA_REPORT (1 << 0) /* report to fsnotify */
+
+#define XFS_VERIFY_MEDIA_FLAGS (XFS_VERIFY_MEDIA_REPORT)
+
/*
* ioctl commands that are used by Linux filesystems
*/
@@ -1042,6 +1227,10 @@ struct xfs_rtgroup_geometry {
#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle)
#define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head)
#define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry)
+#define XFS_IOC_HEALTH_MONITOR _IOW ('X', 68, struct xfs_health_monitor)
+#define XFS_IOC_HEALTH_FD_ON_MONITORED_FS \
+ _IOW ('X', 69, struct xfs_health_file_on_monitored_fs)
+#define XFS_IOC_VERIFY_MEDIA _IOWR('X', 70, struct xfs_verify_media)
/*
* ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/libxfs/xfs_group.c b/fs/xfs/libxfs/xfs_group.c
index 792f76d2e2a0..2ff9d1e56b47 100644
--- a/fs/xfs/libxfs/xfs_group.c
+++ b/fs/xfs/libxfs/xfs_group.c
@@ -3,7 +3,7 @@
* Copyright (c) 2018 Red Hat, Inc.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index b31000f7190c..1d45cf5789e8 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -289,4 +289,9 @@ void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
#define xfs_metadata_is_sick(error) \
(unlikely((error) == -EFSCORRUPTED || (error) == -EFSBADCRC))
+unsigned int xfs_healthmon_inode_mask(unsigned int sick_mask);
+unsigned int xfs_healthmon_rtgroup_mask(unsigned int sick_mask);
+unsigned int xfs_healthmon_perag_mask(unsigned int sick_mask);
+unsigned int xfs_healthmon_fs_mask(unsigned int sick_mask);
+
#endif /* __XFS_HEALTH_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index c19d6d713780..dcef06ec0a02 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 100afdd66cdd..1376e8630449 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 8796f2b3e534..5b2b926ab228 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -3,7 +3,7 @@
* Copyright (c) 2017 Christoph Hellwig.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_bit.h"
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index b1812b2c3cce..a017016e9075 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 1772d82f2d68..d14a7f2f4c03 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -4,7 +4,7 @@
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c
index 309ce6dd5553..551fa51befb6 100644
--- a/fs/xfs/libxfs/xfs_inode_util.c
+++ b/fs/xfs/libxfs/xfs_inode_util.c
@@ -4,7 +4,7 @@
* All Rights Reserved.
*/
#include <linux/iversion.h>
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 908e7060428c..3f5a24dda907 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -184,13 +184,6 @@ struct xlog_rec_header {
#define XLOG_REC_SIZE_OTHER offsetofend(struct xlog_rec_header, h_size)
#endif /* __i386__ */
-/* not an on-disk structure, but needed by log recovery in userspace */
-struct xfs_log_iovec {
- void *i_addr; /* beginning address of region */
- int i_len; /* length in bytes of region */
- uint i_type; /* type of region */
-};
-
/*
* Transaction Header definitions.
*
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index 34bba96d30ca..37712b2f8757 100644
--- a/fs/xfs/libxfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -3,7 +3,7 @@
* Copyright (c) 2013 Jie Liu.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_metadir.c b/fs/xfs/libxfs/xfs_metadir.c
index 178e89711cb7..3e5c61188927 100644
--- a/fs/xfs/libxfs/xfs_metadir.c
+++ b/fs/xfs/libxfs/xfs_metadir.c
@@ -3,7 +3,7 @@
* Copyright (c) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c
index b02e3d6c0868..cf239f862212 100644
--- a/fs/xfs/libxfs/xfs_metafile.c
+++ b/fs/xfs/libxfs/xfs_metafile.c
@@ -3,7 +3,7 @@
* Copyright (c) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c
index 69366c44a701..3509cc4b2175 100644
--- a/fs/xfs/libxfs/xfs_parent.c
+++ b/fs/xfs/libxfs/xfs_parent.c
@@ -3,7 +3,7 @@
* Copyright (c) 2022-2024 Oracle.
* All rights reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_da_format.h"
@@ -29,6 +29,7 @@
#include "xfs_trans_space.h"
#include "xfs_attr_item.h"
#include "xfs_health.h"
+#include "xfs_attr_leaf.h"
struct kmem_cache *xfs_parent_args_cache;
@@ -202,8 +203,8 @@ xfs_parent_addname(
xfs_inode_to_parent_rec(&ppargs->rec, dp);
xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
child->i_ino, parent_name);
- xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_SET);
- return 0;
+
+ return xfs_attr_setname(&ppargs->args, 0);
}
/* Remove a parent pointer to reflect a dirent removal. */
@@ -224,8 +225,8 @@ xfs_parent_removename(
xfs_inode_to_parent_rec(&ppargs->rec, dp);
xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
child->i_ino, parent_name);
- xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REMOVE);
- return 0;
+
+ return xfs_attr_removename(&ppargs->args);
}
/* Replace one parent pointer with another to reflect a rename. */
@@ -250,12 +251,13 @@ xfs_parent_replacename(
child->i_ino, old_name);
xfs_inode_to_parent_rec(&ppargs->new_rec, new_dp);
+
ppargs->args.new_name = new_name->name;
ppargs->args.new_namelen = new_name->len;
ppargs->args.new_value = &ppargs->new_rec;
ppargs->args.new_valuelen = sizeof(struct xfs_parent_rec);
- xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REPLACE);
- return 0;
+
+ return xfs_attr_replacename(&ppargs->args, 0);
}
/*
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 2484dc9f6d7e..915ec85530e7 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -3,7 +3,7 @@
* Copyright (C) 2016 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 06da3ca14727..7e5f92c1ac56 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -3,7 +3,7 @@
* Copyright (C) 2016 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 83e0488ff773..e78133c908ca 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -3,7 +3,7 @@
* Copyright (c) 2014 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index bf16aee50d73..10b3272238eb 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -3,7 +3,7 @@
* Copyright (c) 2014 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 618061d898d4..bc4c0a99f4dd 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c
index be16efaa6925..09328f2d1575 100644
--- a/fs/xfs/libxfs/xfs_rtgroup.c
+++ b/fs/xfs/libxfs/xfs_rtgroup.c
@@ -3,7 +3,7 @@
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h
index 73cace4d25c7..c0b9f9f2c413 100644
--- a/fs/xfs/libxfs/xfs_rtgroup.h
+++ b/fs/xfs/libxfs/xfs_rtgroup.h
@@ -371,4 +371,19 @@ xfs_rtgs_to_rfsbs(
return xfs_groups_to_rfsbs(mp, nr_groups, XG_TYPE_RTG);
}
+/*
+ * Return the "raw" size of a group on the hardware device. This includes the
+ * daddr gaps present for XFS_SB_FEAT_INCOMPAT_ZONE_GAPS file systems.
+ */
+static inline xfs_rgblock_t
+xfs_rtgroup_raw_size(
+ struct xfs_mount *mp)
+{
+ struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG];
+
+ if (g->has_daddr_gaps)
+ return 1U << g->blklog;
+ return g->blocks;
+}
+
#endif /* __LIBXFS_RTGROUP_H */
diff --git a/fs/xfs/libxfs/xfs_rtrefcount_btree.c b/fs/xfs/libxfs/xfs_rtrefcount_btree.c
index ac11e94b42ae..c1518267eb17 100644
--- a/fs/xfs/libxfs/xfs_rtrefcount_btree.c
+++ b/fs/xfs/libxfs/xfs_rtrefcount_btree.c
@@ -3,7 +3,7 @@
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.c b/fs/xfs/libxfs/xfs_rtrmap_btree.c
index 55f903165769..00557b7ef298 100644
--- a/fs/xfs/libxfs/xfs_rtrmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rtrmap_btree.c
@@ -3,7 +3,7 @@
* Copyright (c) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 94c272a2ae26..38d16fe1f6d8 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index fb47a76ead18..f9a5966d8048 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -4,7 +4,7 @@
* Copyright (c) 2012-2013 Red Hat, Inc.
* All rights reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index c962ad64b0c1..1a0fdcbf39fa 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 86a111d0f2fc..3151e97ca8ff 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -4,7 +4,7 @@
* Copyright (C) 2010 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_trans_space.c b/fs/xfs/libxfs/xfs_trans_space.c
index b9dc3752f702..9b8f495c9049 100644
--- a/fs/xfs/libxfs/xfs_trans_space.c
+++ b/fs/xfs/libxfs/xfs_trans_space.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index 1faf04204c5d..67c947a47f14 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -4,7 +4,7 @@
* Copyright (C) 2017 Oracle.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_shared.h"
diff --git a/fs/xfs/libxfs/xfs_zones.c b/fs/xfs/libxfs/xfs_zones.c
index b40f71f878b5..24e350c31933 100644
--- a/fs/xfs/libxfs/xfs_zones.c
+++ b/fs/xfs/libxfs/xfs_zones.c
@@ -3,7 +3,7 @@
* Copyright (c) 2023-2025 Christoph Hellwig.
* Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -15,173 +15,102 @@
#include "xfs_zones.h"
static bool
-xfs_zone_validate_empty(
+xfs_validate_blk_zone_seq(
+ struct xfs_mount *mp,
struct blk_zone *zone,
- struct xfs_rtgroup *rtg,
+ unsigned int zone_no,
xfs_rgblock_t *write_pointer)
{
- struct xfs_mount *mp = rtg_mount(rtg);
-
- if (rtg_rmap(rtg)->i_used_blocks > 0) {
- xfs_warn(mp, "empty zone %u has non-zero used counter (0x%x).",
- rtg_rgno(rtg), rtg_rmap(rtg)->i_used_blocks);
- return false;
- }
-
- *write_pointer = 0;
- return true;
-}
-
-static bool
-xfs_zone_validate_wp(
- struct blk_zone *zone,
- struct xfs_rtgroup *rtg,
- xfs_rgblock_t *write_pointer)
-{
- struct xfs_mount *mp = rtg_mount(rtg);
- xfs_rtblock_t wp_fsb = xfs_daddr_to_rtb(mp, zone->wp);
-
- if (rtg_rmap(rtg)->i_used_blocks > rtg->rtg_extents) {
- xfs_warn(mp, "zone %u has too large used counter (0x%x).",
- rtg_rgno(rtg), rtg_rmap(rtg)->i_used_blocks);
- return false;
- }
-
- if (xfs_rtb_to_rgno(mp, wp_fsb) != rtg_rgno(rtg)) {
- xfs_warn(mp, "zone %u write pointer (0x%llx) outside of zone.",
- rtg_rgno(rtg), wp_fsb);
- return false;
- }
-
- *write_pointer = xfs_rtb_to_rgbno(mp, wp_fsb);
- if (*write_pointer >= rtg->rtg_extents) {
- xfs_warn(mp, "zone %u has invalid write pointer (0x%x).",
- rtg_rgno(rtg), *write_pointer);
- return false;
- }
-
- return true;
-}
-
-static bool
-xfs_zone_validate_full(
- struct blk_zone *zone,
- struct xfs_rtgroup *rtg,
- xfs_rgblock_t *write_pointer)
-{
- struct xfs_mount *mp = rtg_mount(rtg);
-
- if (rtg_rmap(rtg)->i_used_blocks > rtg->rtg_extents) {
- xfs_warn(mp, "zone %u has too large used counter (0x%x).",
- rtg_rgno(rtg), rtg_rmap(rtg)->i_used_blocks);
- return false;
- }
-
- *write_pointer = rtg->rtg_extents;
- return true;
-}
-
-static bool
-xfs_zone_validate_seq(
- struct blk_zone *zone,
- struct xfs_rtgroup *rtg,
- xfs_rgblock_t *write_pointer)
-{
- struct xfs_mount *mp = rtg_mount(rtg);
-
switch (zone->cond) {
case BLK_ZONE_COND_EMPTY:
- return xfs_zone_validate_empty(zone, rtg, write_pointer);
+ *write_pointer = 0;
+ return true;
case BLK_ZONE_COND_IMP_OPEN:
case BLK_ZONE_COND_EXP_OPEN:
case BLK_ZONE_COND_CLOSED:
case BLK_ZONE_COND_ACTIVE:
- return xfs_zone_validate_wp(zone, rtg, write_pointer);
+ if (zone->wp < zone->start ||
+ zone->wp >= zone->start + zone->capacity) {
+ xfs_warn(mp,
+ "zone %u write pointer (%llu) outside of zone.",
+ zone_no, zone->wp);
+ return false;
+ }
+
+ *write_pointer = XFS_BB_TO_FSB(mp, zone->wp - zone->start);
+ return true;
case BLK_ZONE_COND_FULL:
- return xfs_zone_validate_full(zone, rtg, write_pointer);
+ *write_pointer = XFS_BB_TO_FSB(mp, zone->capacity);
+ return true;
case BLK_ZONE_COND_NOT_WP:
case BLK_ZONE_COND_OFFLINE:
case BLK_ZONE_COND_READONLY:
xfs_warn(mp, "zone %u has unsupported zone condition 0x%x.",
- rtg_rgno(rtg), zone->cond);
+ zone_no, zone->cond);
return false;
default:
xfs_warn(mp, "zone %u has unknown zone condition 0x%x.",
- rtg_rgno(rtg), zone->cond);
+ zone_no, zone->cond);
return false;
}
}
static bool
-xfs_zone_validate_conv(
+xfs_validate_blk_zone_conv(
+ struct xfs_mount *mp,
struct blk_zone *zone,
- struct xfs_rtgroup *rtg)
+ unsigned int zone_no)
{
- struct xfs_mount *mp = rtg_mount(rtg);
-
switch (zone->cond) {
case BLK_ZONE_COND_NOT_WP:
return true;
default:
xfs_warn(mp,
"conventional zone %u has unsupported zone condition 0x%x.",
- rtg_rgno(rtg), zone->cond);
+ zone_no, zone->cond);
return false;
}
}
bool
-xfs_zone_validate(
+xfs_validate_blk_zone(
+ struct xfs_mount *mp,
struct blk_zone *zone,
- struct xfs_rtgroup *rtg,
+ unsigned int zone_no,
+ uint32_t expected_size,
+ uint32_t expected_capacity,
xfs_rgblock_t *write_pointer)
{
- struct xfs_mount *mp = rtg_mount(rtg);
- struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG];
- uint32_t expected_size;
-
/*
* Check that the zone capacity matches the rtgroup size stored in the
* superblock. Note that all zones including the last one must have a
* uniform capacity.
*/
- if (XFS_BB_TO_FSB(mp, zone->capacity) != g->blocks) {
+ if (XFS_BB_TO_FSB(mp, zone->capacity) != expected_capacity) {
xfs_warn(mp,
-"zone %u capacity (0x%llx) does not match RT group size (0x%x).",
- rtg_rgno(rtg), XFS_BB_TO_FSB(mp, zone->capacity),
- g->blocks);
+"zone %u capacity (%llu) does not match RT group size (%u).",
+ zone_no, XFS_BB_TO_FSB(mp, zone->capacity),
+ expected_capacity);
return false;
}
- if (g->has_daddr_gaps) {
- expected_size = 1 << g->blklog;
- } else {
- if (zone->len != zone->capacity) {
- xfs_warn(mp,
-"zone %u has capacity != size ((0x%llx vs 0x%llx)",
- rtg_rgno(rtg),
- XFS_BB_TO_FSB(mp, zone->len),
- XFS_BB_TO_FSB(mp, zone->capacity));
- return false;
- }
- expected_size = g->blocks;
- }
-
if (XFS_BB_TO_FSB(mp, zone->len) != expected_size) {
xfs_warn(mp,
-"zone %u length (0x%llx) does match geometry (0x%x).",
- rtg_rgno(rtg), XFS_BB_TO_FSB(mp, zone->len),
+"zone %u length (%llu) does not match geometry (%u).",
+ zone_no, XFS_BB_TO_FSB(mp, zone->len),
expected_size);
+ return false;
}
switch (zone->type) {
case BLK_ZONE_TYPE_CONVENTIONAL:
- return xfs_zone_validate_conv(zone, rtg);
+ return xfs_validate_blk_zone_conv(mp, zone, zone_no);
case BLK_ZONE_TYPE_SEQWRITE_REQ:
- return xfs_zone_validate_seq(zone, rtg, write_pointer);
+ return xfs_validate_blk_zone_seq(mp, zone, zone_no,
+ write_pointer);
default:
xfs_warn(mp, "zoned %u has unsupported type 0x%x.",
- rtg_rgno(rtg), zone->type);
+ zone_no, zone->type);
return false;
}
}
diff --git a/fs/xfs/libxfs/xfs_zones.h b/fs/xfs/libxfs/xfs_zones.h
index 5fefd132e002..c16089c9a652 100644
--- a/fs/xfs/libxfs/xfs_zones.h
+++ b/fs/xfs/libxfs/xfs_zones.h
@@ -3,6 +3,7 @@
#define _LIBXFS_ZONES_H
struct xfs_rtgroup;
+struct blk_zone;
/*
* In order to guarantee forward progress for GC we need to reserve at least
@@ -36,7 +37,8 @@ struct xfs_rtgroup;
*/
#define XFS_DEFAULT_MAX_OPEN_ZONES 128
-bool xfs_zone_validate(struct blk_zone *zone, struct xfs_rtgroup *rtg,
- xfs_rgblock_t *write_pointer);
+bool xfs_validate_blk_zone(struct xfs_mount *mp, struct blk_zone *zone,
+ unsigned int zone_no, uint32_t expected_size,
+ uint32_t expected_capacity, xfs_rgblock_t *write_pointer);
#endif /* _LIBXFS_ZONES_H */
diff --git a/fs/xfs/scrub/agb_bitmap.c b/fs/xfs/scrub/agb_bitmap.c
index 573e4e062754..0194e3aaa1fa 100644
--- a/fs/xfs/scrub/agb_bitmap.c
+++ b/fs/xfs/scrub/agb_bitmap.c
@@ -3,7 +3,7 @@
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_bit.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 303374df44bd..7ffe4b0ef0f1 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index cd6f0223879f..15d58eedb387 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -3,7 +3,7 @@
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -837,8 +837,12 @@ xrep_agi_buf_cleanup(
{
struct xrep_agi *ragi = buf;
- xfarray_destroy(ragi->iunlink_prev);
- xfarray_destroy(ragi->iunlink_next);
+ if (ragi->iunlink_prev)
+ xfarray_destroy(ragi->iunlink_prev);
+ ragi->iunlink_prev = NULL;
+ if (ragi->iunlink_next)
+ xfarray_destroy(ragi->iunlink_next);
+ ragi->iunlink_next = NULL;
xagino_bitmap_destroy(&ragi->iunlink_bmp);
}
@@ -1708,7 +1712,6 @@ xrep_agi(
{
struct xrep_agi *ragi;
struct xfs_mount *mp = sc->mp;
- char *descr;
unsigned int i;
int error;
@@ -1742,17 +1745,13 @@ xrep_agi(
xagino_bitmap_init(&ragi->iunlink_bmp);
sc->buf_cleanup = xrep_agi_buf_cleanup;
- descr = xchk_xfile_ag_descr(sc, "iunlinked next pointers");
- error = xfarray_create(descr, 0, sizeof(xfs_agino_t),
- &ragi->iunlink_next);
- kfree(descr);
+ error = xfarray_create("iunlinked next pointers", 0,
+ sizeof(xfs_agino_t), &ragi->iunlink_next);
if (error)
return error;
- descr = xchk_xfile_ag_descr(sc, "iunlinked prev pointers");
- error = xfarray_create(descr, 0, sizeof(xfs_agino_t),
- &ragi->iunlink_prev);
- kfree(descr);
+ error = xfarray_create("iunlinked prev pointers", 0,
+ sizeof(xfs_agino_t), &ragi->iunlink_prev);
if (error)
return error;
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 8b282138097f..48edaa2cb1e0 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c
index bed6a09aa791..5b4c2a39a155 100644
--- a/fs/xfs/scrub/alloc_repair.c
+++ b/fs/xfs/scrub/alloc_repair.c
@@ -3,7 +3,7 @@
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -850,7 +850,6 @@ xrep_allocbt(
struct xrep_abt *ra;
struct xfs_mount *mp = sc->mp;
unsigned int busy_gen;
- char *descr;
int error;
/* We require the rmapbt to rebuild anything. */
@@ -876,11 +875,9 @@ xrep_allocbt(
}
/* Set up enough storage to handle maximally fragmented free space. */
- descr = xchk_xfile_ag_descr(sc, "free space records");
- error = xfarray_create(descr, mp->m_sb.sb_agblocks / 2,
+ error = xfarray_create("free space records", mp->m_sb.sb_agblocks / 2,
sizeof(struct xfs_alloc_rec_incore),
&ra->free_records);
- kfree(descr);
if (error)
goto out_ra;
@@ -926,7 +923,22 @@ xrep_revalidate_allocbt(
if (error)
goto out;
+ /*
+ * If the bnobt is still corrupt, we've failed to repair the filesystem
+ * and should just bail out.
+ *
+ * If the bnobt fails cross-examination with the cntbt, the scan will
+ * free the cntbt cursor, so we need to mark the repair incomplete
+ * and avoid walking off the end of the NULL cntbt cursor.
+ */
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ goto out;
+
sc->sm->sm_type = XFS_SCRUB_TYPE_CNTBT;
+ if (!sc->sa.cnt_cur) {
+ xchk_set_incomplete(sc);
+ goto out;
+ }
error = xchk_allocbt(sc);
out:
sc->sm->sm_type = old_type;
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 708334f9b2bd..c3c122ea2d32 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -288,32 +288,6 @@ xchk_xattr_set_map(
}
/*
- * Check the leaf freemap from the usage bitmap. Returns false if the
- * attr freemap has problems or points to used space.
- */
-STATIC bool
-xchk_xattr_check_freemap(
- struct xfs_scrub *sc,
- struct xfs_attr3_icleaf_hdr *leafhdr)
-{
- struct xchk_xattr_buf *ab = sc->buf;
- unsigned int mapsize = sc->mp->m_attr_geo->blksize;
- int i;
-
- /* Construct bitmap of freemap contents. */
- bitmap_zero(ab->freemap, mapsize);
- for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
- if (!xchk_xattr_set_map(sc, ab->freemap,
- leafhdr->freemap[i].base,
- leafhdr->freemap[i].size))
- return false;
- }
-
- /* Look for bits that are set in freemap and are marked in use. */
- return !bitmap_intersects(ab->freemap, ab->usedmap, mapsize);
-}
-
-/*
* Check this leaf entry's relations to everything else.
* Returns the number of bytes used for the name/value data.
*/
@@ -364,7 +338,10 @@ xchk_xattr_entry(
rentry = xfs_attr3_leaf_name_remote(leaf, idx);
namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
name_end = (char *)rentry + namesize;
- if (rentry->namelen == 0 || rentry->valueblk == 0)
+ if (rentry->namelen == 0)
+ xchk_da_set_corrupt(ds, level);
+ if (rentry->valueblk == 0 &&
+ !(ent->flags & XFS_ATTR_INCOMPLETE))
xchk_da_set_corrupt(ds, level);
}
if (name_end > buf_end)
@@ -403,6 +380,7 @@ xchk_xattr_block(
*last_checked = blk->blkno;
bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize);
+ bitmap_zero(ab->freemap, mp->m_attr_geo->blksize);
/* Check all the padding. */
if (xfs_has_crc(ds->sc->mp)) {
@@ -449,6 +427,9 @@ xchk_xattr_block(
if ((char *)&entries[leafhdr.count] > (char *)leaf + leafhdr.firstused)
xchk_da_set_corrupt(ds, level);
+ if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ goto out;
+
buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
/* Mark the leaf entry itself. */
@@ -467,7 +448,29 @@ xchk_xattr_block(
goto out;
}
- if (!xchk_xattr_check_freemap(ds->sc, &leafhdr))
+ /* Construct bitmap of freemap contents. */
+ for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
+ if (!xchk_xattr_set_map(ds->sc, ab->freemap,
+ leafhdr.freemap[i].base,
+ leafhdr.freemap[i].size))
+ xchk_da_set_corrupt(ds, level);
+
+ /*
+ * freemap entries with zero length and nonzero base can cause
+ * problems with older kernels, so we mark these for preening
+ * even though there's no inconsistency.
+ */
+ if (leafhdr.freemap[i].size == 0 &&
+ leafhdr.freemap[i].base > 0)
+ xchk_da_set_preen(ds, level);
+
+ if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ goto out;
+ }
+
+ /* Look for bits that are set in freemap and are marked in use. */
+ if (bitmap_intersects(ab->freemap, ab->usedmap,
+ mp->m_attr_geo->blksize))
xchk_da_set_corrupt(ds, level);
if (leafhdr.usedbytes != usedbytes)
diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c
index 09d63aa10314..a924b467a844 100644
--- a/fs/xfs/scrub/attr_repair.c
+++ b/fs/xfs/scrub/attr_repair.c
@@ -3,7 +3,7 @@
* Copyright (c) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -1516,8 +1516,10 @@ xrep_xattr_teardown(
xfblob_destroy(rx->pptr_names);
if (rx->pptr_recs)
xfarray_destroy(rx->pptr_recs);
- xfblob_destroy(rx->xattr_blobs);
- xfarray_destroy(rx->xattr_records);
+ if (rx->xattr_blobs)
+ xfblob_destroy(rx->xattr_blobs);
+ if (rx->xattr_records)
+ xfarray_destroy(rx->xattr_records);
mutex_destroy(&rx->lock);
kfree(rx);
}
@@ -1529,7 +1531,6 @@ xrep_xattr_setup_scan(
struct xrep_xattr **rxp)
{
struct xrep_xattr *rx;
- char *descr;
int max_len;
int error;
@@ -1555,35 +1556,26 @@ xrep_xattr_setup_scan(
goto out_rx;
/* Set up some staging for salvaged attribute keys and values */
- descr = xchk_xfile_ino_descr(sc, "xattr keys");
- error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key),
+ error = xfarray_create("xattr keys", 0, sizeof(struct xrep_xattr_key),
&rx->xattr_records);
- kfree(descr);
if (error)
goto out_rx;
- descr = xchk_xfile_ino_descr(sc, "xattr names");
- error = xfblob_create(descr, &rx->xattr_blobs);
- kfree(descr);
+ error = xfblob_create("xattr names", &rx->xattr_blobs);
if (error)
goto out_keys;
if (xfs_has_parent(sc->mp)) {
ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
- descr = xchk_xfile_ino_descr(sc,
- "xattr retained parent pointer entries");
- error = xfarray_create(descr, 0,
+ error = xfarray_create("xattr parent pointer entries", 0,
sizeof(struct xrep_xattr_pptr),
&rx->pptr_recs);
- kfree(descr);
if (error)
goto out_values;
- descr = xchk_xfile_ino_descr(sc,
- "xattr retained parent pointer names");
- error = xfblob_create(descr, &rx->pptr_names);
- kfree(descr);
+ error = xfblob_create("xattr parent pointer names",
+ &rx->pptr_names);
if (error)
goto out_pprecs;
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index 7ba35a7a7920..51f3171bc6c8 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -3,7 +3,7 @@
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_bit.h"
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 4f1e2574660d..d40534bf9ef9 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c
index 1084213b8e9b..0a83d5845379 100644
--- a/fs/xfs/scrub/bmap_repair.c
+++ b/fs/xfs/scrub/bmap_repair.c
@@ -3,7 +3,7 @@
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -923,7 +923,6 @@ xrep_bmap(
bool allow_unwritten)
{
struct xrep_bmap *rb;
- char *descr;
xfs_extnum_t max_bmbt_recs;
bool large_extcount;
int error = 0;
@@ -945,11 +944,8 @@ xrep_bmap(
/* Set up enough storage to handle the max records for this fork. */
large_extcount = xfs_has_large_extent_counts(sc->mp);
max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork);
- descr = xchk_xfile_ino_descr(sc, "%s fork mapping records",
- whichfork == XFS_DATA_FORK ? "data" : "attr");
- error = xfarray_create(descr, max_bmbt_recs,
+ error = xfarray_create("fork mapping records", max_bmbt_recs,
sizeof(struct xfs_bmbt_rec), &rb->bmap_records);
- kfree(descr);
if (error)
goto out_rb;
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index cd6f0ff382a7..1089b1f4c5df 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -42,6 +42,8 @@ __xchk_btree_process_error(
break;
case -EFSBADCRC:
case -EFSCORRUPTED:
+ case -EIO:
+ case -ENODATA:
/* Note the badness but don't abort. */
sc->sm->sm_flags |= errflag;
*error = 0;
@@ -370,12 +372,15 @@ xchk_btree_check_block_owner(
{
xfs_agnumber_t agno;
xfs_agblock_t agbno;
+ bool is_bnobt, is_rmapbt;
bool init_sa;
int error = 0;
if (!bs->cur)
return 0;
+ is_bnobt = xfs_btree_is_bno(bs->cur->bc_ops);
+ is_rmapbt = xfs_btree_is_rmap(bs->cur->bc_ops);
agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
@@ -398,11 +403,11 @@ xchk_btree_check_block_owner(
* have to nullify it (to shut down further block owner checks) if
* self-xref encounters problems.
*/
- if (!bs->sc->sa.bno_cur && xfs_btree_is_bno(bs->cur->bc_ops))
+ if (!bs->sc->sa.bno_cur && is_bnobt)
bs->cur = NULL;
xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo);
- if (!bs->sc->sa.rmap_cur && xfs_btree_is_rmap(bs->cur->bc_ops))
+ if (!bs->sc->sa.rmap_cur && is_rmapbt)
bs->cur = NULL;
out_free:
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 7bfa37c99480..20e63069088b 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -103,6 +103,8 @@ __xchk_process_error(
break;
case -EFSBADCRC:
case -EFSCORRUPTED:
+ case -EIO:
+ case -ENODATA:
/* Note the badness but don't abort. */
sc->sm->sm_flags |= errflag;
*error = 0;
@@ -177,6 +179,8 @@ __xchk_fblock_process_error(
break;
case -EFSBADCRC:
case -EFSCORRUPTED:
+ case -EIO:
+ case -ENODATA:
/* Note the badness but don't abort. */
sc->sm->sm_flags |= errflag;
*error = 0;
@@ -1395,6 +1399,9 @@ xchk_metadata_inode_subtype(
int error;
sub = xchk_scrub_create_subord(sc, scrub_type);
+ if (!sub)
+ return -ENOMEM;
+
error = sub->sc.ops->scrub(&sub->sc);
xchk_scrub_free_subord(sub);
return error;
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index ddbc065c798c..f2ecc68538f0 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -247,31 +247,6 @@ static inline bool xchk_could_repair(const struct xfs_scrub *sc)
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
/*
- * Helper macros to allocate and format xfile description strings.
- * Callers must kfree the pointer returned.
- */
-#define xchk_xfile_descr(sc, fmt, ...) \
- kasprintf(XCHK_GFP_FLAGS, "XFS (%s): " fmt, \
- (sc)->mp->m_super->s_id, ##__VA_ARGS__)
-#define xchk_xfile_ag_descr(sc, fmt, ...) \
- kasprintf(XCHK_GFP_FLAGS, "XFS (%s): AG 0x%x " fmt, \
- (sc)->mp->m_super->s_id, \
- (sc)->sa.pag ? \
- pag_agno((sc)->sa.pag) : (sc)->sm->sm_agno, \
- ##__VA_ARGS__)
-#define xchk_xfile_ino_descr(sc, fmt, ...) \
- kasprintf(XCHK_GFP_FLAGS, "XFS (%s): inode 0x%llx " fmt, \
- (sc)->mp->m_super->s_id, \
- (sc)->ip ? (sc)->ip->i_ino : (sc)->sm->sm_ino, \
- ##__VA_ARGS__)
-#define xchk_xfile_rtgroup_descr(sc, fmt, ...) \
- kasprintf(XCHK_GFP_FLAGS, "XFS (%s): rtgroup 0x%x " fmt, \
- (sc)->mp->m_super->s_id, \
- (sc)->sa.pag ? \
- rtg_rgno((sc)->sr.rtg) : (sc)->sm->sm_agno, \
- ##__VA_ARGS__)
-
-/*
* Setting up a hook to wait for intents to drain is costly -- we have to take
* the CPU hotplug lock and force an i-cache flush on all CPUs once to set it
* up, and again to tear it down. These costs add up quickly, so we only want
diff --git a/fs/xfs/scrub/cow_repair.c b/fs/xfs/scrub/cow_repair.c
index b2a83801412e..33749cf43520 100644
--- a/fs/xfs/scrub/cow_repair.c
+++ b/fs/xfs/scrub/cow_repair.c
@@ -3,7 +3,7 @@
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 056de4819f86..5858d4d5e279 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -45,6 +45,8 @@ xchk_da_process_error(
break;
case -EFSBADCRC:
case -EFSCORRUPTED:
+ case -EIO:
+ case -ENODATA:
/* Note the badness but don't abort. */
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
*error = 0;
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index c877bde71e62..91228623d016 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -1102,22 +1102,17 @@ xchk_directory(
sd->xname.name = sd->namebuf;
if (xfs_has_parent(sc->mp)) {
- char *descr;
-
/*
* Set up some staging memory for dirents that we can't check
* due to locking contention.
*/
- descr = xchk_xfile_ino_descr(sc, "slow directory entries");
- error = xfarray_create(descr, 0, sizeof(struct xchk_dirent),
- &sd->dir_entries);
- kfree(descr);
+ error = xfarray_create("slow directory entries", 0,
+ sizeof(struct xchk_dirent), &sd->dir_entries);
if (error)
goto out_sd;
- descr = xchk_xfile_ino_descr(sc, "slow directory entry names");
- error = xfblob_create(descr, &sd->dir_names);
- kfree(descr);
+ error = xfblob_create("slow directory entry names",
+ &sd->dir_names);
if (error)
goto out_entries;
}
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index 8d3b550990b5..f105e49f654b 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -3,7 +3,7 @@
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -172,8 +172,12 @@ xrep_dir_teardown(
struct xrep_dir *rd = sc->buf;
xrep_findparent_scan_teardown(&rd->pscan);
- xfblob_destroy(rd->dir_names);
- xfarray_destroy(rd->dir_entries);
+ if (rd->dir_names)
+ xfblob_destroy(rd->dir_names);
+ rd->dir_names = NULL;
+ if (rd->dir_entries)
+ xfarray_destroy(rd->dir_entries);
+ rd->dir_names = NULL;
}
/* Set up for a directory repair. */
@@ -1784,20 +1788,15 @@ xrep_dir_setup_scan(
struct xrep_dir *rd)
{
struct xfs_scrub *sc = rd->sc;
- char *descr;
int error;
/* Set up some staging memory for salvaging dirents. */
- descr = xchk_xfile_ino_descr(sc, "directory entries");
- error = xfarray_create(descr, 0, sizeof(struct xrep_dirent),
- &rd->dir_entries);
- kfree(descr);
+ error = xfarray_create("directory entries", 0,
+ sizeof(struct xrep_dirent), &rd->dir_entries);
if (error)
return error;
- descr = xchk_xfile_ino_descr(sc, "directory entry names");
- error = xfblob_create(descr, &rd->dir_names);
- kfree(descr);
+ error = xfblob_create("directory entry names", &rd->dir_names);
if (error)
goto out_xfarray;
diff --git a/fs/xfs/scrub/dirtree.c b/fs/xfs/scrub/dirtree.c
index 3a9cdf8738b6..e95dc74f1145 100644
--- a/fs/xfs/scrub/dirtree.c
+++ b/fs/xfs/scrub/dirtree.c
@@ -3,7 +3,7 @@
* Copyright (c) 2023-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -81,8 +81,12 @@ xchk_dirtree_buf_cleanup(
kfree(path);
}
- xfblob_destroy(dl->path_names);
- xfarray_destroy(dl->path_steps);
+ if (dl->path_names)
+ xfblob_destroy(dl->path_names);
+ dl->path_names = NULL;
+ if (dl->path_steps)
+ xfarray_destroy(dl->path_steps);
+ dl->path_steps = NULL;
mutex_destroy(&dl->lock);
}
@@ -92,7 +96,6 @@ xchk_setup_dirtree(
struct xfs_scrub *sc)
{
struct xchk_dirtree *dl;
- char *descr;
int error;
xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
@@ -116,16 +119,12 @@ xchk_setup_dirtree(
mutex_init(&dl->lock);
- descr = xchk_xfile_ino_descr(sc, "dirtree path steps");
- error = xfarray_create(descr, 0, sizeof(struct xchk_dirpath_step),
- &dl->path_steps);
- kfree(descr);
+ error = xfarray_create("dirtree path steps", 0,
+ sizeof(struct xchk_dirpath_step), &dl->path_steps);
if (error)
goto out_dl;
- descr = xchk_xfile_ino_descr(sc, "dirtree path names");
- error = xfblob_create(descr, &dl->path_names);
- kfree(descr);
+ error = xfblob_create("dirtree path names", &dl->path_names);
if (error)
goto out_steps;
diff --git a/fs/xfs/scrub/dirtree_repair.c b/fs/xfs/scrub/dirtree_repair.c
index 5c04e70ba951..019feaf0d606 100644
--- a/fs/xfs/scrub/dirtree_repair.c
+++ b/fs/xfs/scrub/dirtree_repair.c
@@ -3,7 +3,7 @@
* Copyright (c) 2023-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/dqiterate.c b/fs/xfs/scrub/dqiterate.c
index 20c4daedd48d..10950e4bd4c3 100644
--- a/fs/xfs/scrub/dqiterate.c
+++ b/fs/xfs/scrub/dqiterate.c
@@ -3,7 +3,7 @@
* Copyright (C) 2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_bit.h"
diff --git a/fs/xfs/scrub/findparent.c b/fs/xfs/scrub/findparent.c
index 84487072b6dd..2076f028d271 100644
--- a/fs/xfs/scrub/findparent.c
+++ b/fs/xfs/scrub/findparent.c
@@ -3,7 +3,7 @@
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index cebd0d526926..b35f65b537ba 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -3,7 +3,7 @@
* Copyright (C) 2019-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/fscounters_repair.c b/fs/xfs/scrub/fscounters_repair.c
index f0d2b04644e4..783e409f8f3c 100644
--- a/fs/xfs/scrub/fscounters_repair.c
+++ b/fs/xfs/scrub/fscounters_repair.c
@@ -3,7 +3,7 @@
* Copyright (c) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index 3c0f25098b69..2171bcf0f6c1 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -3,7 +3,7 @@
* Copyright (C) 2019-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 4dc7c83dc08a..911dc0f9a79d 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/ialloc_repair.c b/fs/xfs/scrub/ialloc_repair.c
index 14e48d3f1912..9b63b9d19e1b 100644
--- a/fs/xfs/scrub/ialloc_repair.c
+++ b/fs/xfs/scrub/ialloc_repair.c
@@ -3,7 +3,7 @@
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -797,7 +797,6 @@ xrep_iallocbt(
{
struct xrep_ibt *ri;
struct xfs_mount *mp = sc->mp;
- char *descr;
xfs_agino_t first_agino, last_agino;
int error = 0;
@@ -816,11 +815,9 @@ xrep_iallocbt(
/* Set up enough storage to handle an AG with nothing but inodes. */
xfs_agino_range(mp, pag_agno(sc->sa.pag), &first_agino, &last_agino);
last_agino /= XFS_INODES_PER_CHUNK;
- descr = xchk_xfile_ag_descr(sc, "inode index records");
- error = xfarray_create(descr, last_agino,
+ error = xfarray_create("inode index records", last_agino,
sizeof(struct xfs_inobt_rec_incore),
&ri->inode_records);
- kfree(descr);
if (error)
goto out_ri;
@@ -866,10 +863,24 @@ xrep_revalidate_iallocbt(
if (error)
goto out;
- if (xfs_has_finobt(sc->mp)) {
- sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT;
- error = xchk_iallocbt(sc);
+ /*
+ * If the inobt is still corrupt, we've failed to repair the filesystem
+ * and should just bail out.
+ *
+ * If the inobt fails cross-examination with the finobt, the scan will
+ * free the finobt cursor, so we need to mark the repair incomplete
+ * and avoid walking off the end of the NULL finobt cursor.
+ */
+ if (!xfs_has_finobt(sc->mp) ||
+ (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+ goto out;
+
+ sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT;
+ if (!sc->sa.fino_cur) {
+ xchk_set_incomplete(sc);
+ goto out;
}
+ error = xchk_iallocbt(sc);
out:
sc->sm->sm_type = old_type;
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index bb3f475b6353..948d04dcba2a 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
index 4f7040c9ddf0..bf182a18f115 100644
--- a/fs/xfs/scrub/inode_repair.c
+++ b/fs/xfs/scrub/inode_repair.c
@@ -3,7 +3,7 @@
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/iscan.c b/fs/xfs/scrub/iscan.c
index 84f117667ca2..2a974eed00cc 100644
--- a/fs/xfs/scrub/iscan.c
+++ b/fs/xfs/scrub/iscan.c
@@ -3,7 +3,7 @@
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/listxattr.c b/fs/xfs/scrub/listxattr.c
index 256ff7700c94..0863db64b1b2 100644
--- a/fs/xfs/scrub/listxattr.c
+++ b/fs/xfs/scrub/listxattr.c
@@ -3,7 +3,7 @@
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c
index 378ec7c8d38e..3d9de59c1758 100644
--- a/fs/xfs/scrub/metapath.c
+++ b/fs/xfs/scrub/metapath.c
@@ -3,7 +3,7 @@
* Copyright (c) 2023-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c
index 951ae8b71566..43e868f829aa 100644
--- a/fs/xfs/scrub/newbt.c
+++ b/fs/xfs/scrub/newbt.c
@@ -3,7 +3,7 @@
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c
index 091c79e432e5..e80fe7395d78 100644
--- a/fs/xfs/scrub/nlinks.c
+++ b/fs/xfs/scrub/nlinks.c
@@ -3,7 +3,7 @@
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -971,7 +971,8 @@ xchk_nlinks_teardown_scan(
xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook);
- xfarray_destroy(xnc->nlinks);
+ if (xnc->nlinks)
+ xfarray_destroy(xnc->nlinks);
xnc->nlinks = NULL;
xchk_iscan_teardown(&xnc->collect_iscan);
@@ -990,7 +991,6 @@ xchk_nlinks_setup_scan(
struct xchk_nlink_ctrs *xnc)
{
struct xfs_mount *mp = sc->mp;
- char *descr;
unsigned long long max_inos;
xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1;
xfs_agino_t first_agino, last_agino;
@@ -1007,10 +1007,9 @@ xchk_nlinks_setup_scan(
*/
xfs_agino_range(mp, last_agno, &first_agino, &last_agino);
max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1;
- descr = xchk_xfile_descr(sc, "file link counts");
- error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos),
+ error = xfarray_create("file link counts",
+ min(XFS_MAXINUMBER + 1, max_inos),
sizeof(struct xchk_nlink), &xnc->nlinks);
- kfree(descr);
if (error)
goto out_teardown;
diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c
index 6ef2ee9c3814..9049215c6eae 100644
--- a/fs/xfs/scrub/nlinks_repair.c
+++ b/fs/xfs/scrub/nlinks_repair.c
@@ -3,7 +3,7 @@
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c
index 4e550a1d5353..52a108f6d5f4 100644
--- a/fs/xfs/scrub/orphanage.c
+++ b/fs/xfs/scrub/orphanage.c
@@ -3,7 +3,7 @@
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 11d5de10fd56..5a259570b154 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -755,7 +755,6 @@ xchk_parent_pptr(
struct xfs_scrub *sc)
{
struct xchk_pptrs *pp;
- char *descr;
int error;
pp = kvzalloc(sizeof(struct xchk_pptrs), XCHK_GFP_FLAGS);
@@ -768,16 +767,12 @@ xchk_parent_pptr(
* Set up some staging memory for parent pointers that we can't check
* due to locking contention.
*/
- descr = xchk_xfile_ino_descr(sc, "slow parent pointer entries");
- error = xfarray_create(descr, 0, sizeof(struct xchk_pptr),
- &pp->pptr_entries);
- kfree(descr);
+ error = xfarray_create("slow parent pointer entries", 0,
+ sizeof(struct xchk_pptr), &pp->pptr_entries);
if (error)
goto out_pp;
- descr = xchk_xfile_ino_descr(sc, "slow parent pointer names");
- error = xfblob_create(descr, &pp->pptr_names);
- kfree(descr);
+ error = xfblob_create("slow parent pointer names", &pp->pptr_names);
if (error)
goto out_entries;
diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c
index 2949feda6271..83a8205ae2f1 100644
--- a/fs/xfs/scrub/parent_repair.c
+++ b/fs/xfs/scrub/parent_repair.c
@@ -3,7 +3,7 @@
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -1497,7 +1497,6 @@ xrep_parent_setup_scan(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
- char *descr;
struct xfs_da_geometry *geo = sc->mp->m_attr_geo;
int max_len;
int error;
@@ -1525,32 +1524,22 @@ xrep_parent_setup_scan(
goto out_xattr_name;
/* Set up some staging memory for logging parent pointer updates. */
- descr = xchk_xfile_ino_descr(sc, "parent pointer entries");
- error = xfarray_create(descr, 0, sizeof(struct xrep_pptr),
- &rp->pptr_recs);
- kfree(descr);
+ error = xfarray_create("parent pointer entries", 0,
+ sizeof(struct xrep_pptr), &rp->pptr_recs);
if (error)
goto out_xattr_value;
- descr = xchk_xfile_ino_descr(sc, "parent pointer names");
- error = xfblob_create(descr, &rp->pptr_names);
- kfree(descr);
+ error = xfblob_create("parent pointer names", &rp->pptr_names);
if (error)
goto out_recs;
/* Set up some storage for copying attrs before the mapping exchange */
- descr = xchk_xfile_ino_descr(sc,
- "parent pointer retained xattr entries");
- error = xfarray_create(descr, 0, sizeof(struct xrep_parent_xattr),
- &rp->xattr_records);
- kfree(descr);
+ error = xfarray_create("parent pointer xattr entries", 0,
+ sizeof(struct xrep_parent_xattr), &rp->xattr_records);
if (error)
goto out_names;
- descr = xchk_xfile_ino_descr(sc,
- "parent pointer retained xattr values");
- error = xfblob_create(descr, &rp->xattr_blobs);
- kfree(descr);
+ error = xfblob_create("parent pointer xattr values", &rp->xattr_blobs);
if (error)
goto out_attr_keys;
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 5c5374c44c5a..1d25bd5b892e 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_bit.h"
diff --git a/fs/xfs/scrub/quota_repair.c b/fs/xfs/scrub/quota_repair.c
index b1d661aa5f06..487bd4f68ebb 100644
--- a/fs/xfs/scrub/quota_repair.c
+++ b/fs/xfs/scrub/quota_repair.c
@@ -3,7 +3,7 @@
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c
index d412a8359784..e8cba19334a0 100644
--- a/fs/xfs/scrub/quotacheck.c
+++ b/fs/xfs/scrub/quotacheck.c
@@ -3,7 +3,7 @@
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -741,7 +741,6 @@ xqcheck_setup_scan(
struct xfs_scrub *sc,
struct xqcheck *xqc)
{
- char *descr;
struct xfs_quotainfo *qi = sc->mp->m_quotainfo;
unsigned long long max_dquots = XFS_DQ_ID_MAX + 1ULL;
int error;
@@ -756,28 +755,22 @@ xqcheck_setup_scan(
error = -ENOMEM;
if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_USER)) {
- descr = xchk_xfile_descr(sc, "user dquot records");
- error = xfarray_create(descr, max_dquots,
+ error = xfarray_create("user dquot records", max_dquots,
sizeof(struct xqcheck_dquot), &xqc->ucounts);
- kfree(descr);
if (error)
goto out_teardown;
}
if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_GROUP)) {
- descr = xchk_xfile_descr(sc, "group dquot records");
- error = xfarray_create(descr, max_dquots,
+ error = xfarray_create("group dquot records", max_dquots,
sizeof(struct xqcheck_dquot), &xqc->gcounts);
- kfree(descr);
if (error)
goto out_teardown;
}
if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_PROJ)) {
- descr = xchk_xfile_descr(sc, "project dquot records");
- error = xfarray_create(descr, max_dquots,
+ error = xfarray_create("project dquot records", max_dquots,
sizeof(struct xqcheck_dquot), &xqc->pcounts);
- kfree(descr);
if (error)
goto out_teardown;
}
diff --git a/fs/xfs/scrub/quotacheck_repair.c b/fs/xfs/scrub/quotacheck_repair.c
index 51be8d8d261b..dbb522e1513b 100644
--- a/fs/xfs/scrub/quotacheck_repair.c
+++ b/fs/xfs/scrub/quotacheck_repair.c
@@ -3,7 +3,7 @@
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/rcbag.c b/fs/xfs/scrub/rcbag.c
index e1e52bc20713..c1a97a073d92 100644
--- a/fs/xfs/scrub/rcbag.c
+++ b/fs/xfs/scrub/rcbag.c
@@ -3,7 +3,7 @@
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/rcbag_btree.c b/fs/xfs/scrub/rcbag_btree.c
index 9a4ef823c5a7..367f8ccf55c4 100644
--- a/fs/xfs/scrub/rcbag_btree.c
+++ b/fs/xfs/scrub/rcbag_btree.c
@@ -3,7 +3,7 @@
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/readdir.c b/fs/xfs/scrub/readdir.c
index 01c9a2dc0f2c..c66ec9093a38 100644
--- a/fs/xfs/scrub/readdir.c
+++ b/fs/xfs/scrub/readdir.c
@@ -3,7 +3,7 @@
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c
index 07f5bb8a6421..fff23932828b 100644
--- a/fs/xfs/scrub/reap.c
+++ b/fs/xfs/scrub/reap.c
@@ -3,7 +3,7 @@
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index d46528023015..bf87025f24fc 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/refcount_repair.c b/fs/xfs/scrub/refcount_repair.c
index 9c8cb5332da0..507993e0fb0f 100644
--- a/fs/xfs/scrub/refcount_repair.c
+++ b/fs/xfs/scrub/refcount_repair.c
@@ -3,7 +3,7 @@
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -123,13 +123,7 @@ int
xrep_setup_ag_refcountbt(
struct xfs_scrub *sc)
{
- char *descr;
- int error;
-
- descr = xchk_xfile_ag_descr(sc, "rmap record bag");
- error = xrep_setup_xfbtree(sc, descr);
- kfree(descr);
- return error;
+ return xrep_setup_xfbtree(sc, "rmap record bag");
}
/* Check for any obvious conflicts with this shared/CoW staging extent. */
@@ -704,7 +698,6 @@ xrep_refcountbt(
{
struct xrep_refc *rr;
struct xfs_mount *mp = sc->mp;
- char *descr;
int error;
/* We require the rmapbt to rebuild anything. */
@@ -717,11 +710,9 @@ xrep_refcountbt(
rr->sc = sc;
/* Set up enough storage to handle one refcount record per block. */
- descr = xchk_xfile_ag_descr(sc, "reference count records");
- error = xfarray_create(descr, mp->m_sb.sb_agblocks,
+ error = xfarray_create("reference count records", mp->m_sb.sb_agblocks,
sizeof(struct xfs_refcount_irec),
&rr->refcount_records);
- kfree(descr);
if (error)
goto out_rr;
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index efd5a7ccdf62..ac8c592579bd 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -3,7 +3,7 @@
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -1136,6 +1136,9 @@ xrep_metadata_inode_subtype(
* setup/teardown routines.
*/
sub = xchk_scrub_create_subord(sc, scrub_type);
+ if (!sub)
+ return -ENOMEM;
+
error = sub->sc.ops->scrub(&sub->sc);
if (error)
goto out;
diff --git a/fs/xfs/scrub/rgsuper.c b/fs/xfs/scrub/rgsuper.c
index d189732d0e24..482f899a518a 100644
--- a/fs/xfs/scrub/rgsuper.c
+++ b/fs/xfs/scrub/rgsuper.c
@@ -3,7 +3,7 @@
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 39e9ad7cd8ae..2c25910e2903 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c
index 17d4a38d735c..ab7053e25e1c 100644
--- a/fs/xfs/scrub/rmap_repair.c
+++ b/fs/xfs/scrub/rmap_repair.c
@@ -3,7 +3,7 @@
* Copyright (c) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -164,14 +164,11 @@ xrep_setup_ag_rmapbt(
struct xfs_scrub *sc)
{
struct xrep_rmap *rr;
- char *descr;
int error;
xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP);
- descr = xchk_xfile_ag_descr(sc, "reverse mapping records");
- error = xrep_setup_xfbtree(sc, descr);
- kfree(descr);
+ error = xrep_setup_xfbtree(sc, "reverse mapping records");
if (error)
return error;
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index d5ff8609dbfb..4bcfd99cec17 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/rtbitmap_repair.c b/fs/xfs/scrub/rtbitmap_repair.c
index 203a1a97c502..f4ca86a2ea1b 100644
--- a/fs/xfs/scrub/rtbitmap_repair.c
+++ b/fs/xfs/scrub/rtbitmap_repair.c
@@ -3,7 +3,7 @@
* Copyright (C) 2020-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -43,7 +43,6 @@ xrep_setup_rtbitmap(
struct xchk_rtbitmap *rtb)
{
struct xfs_mount *mp = sc->mp;
- char *descr;
unsigned long long blocks = mp->m_sb.sb_rbmblocks;
int error;
@@ -52,9 +51,8 @@ xrep_setup_rtbitmap(
return error;
/* Create an xfile to hold our reconstructed bitmap. */
- descr = xchk_xfile_rtgroup_descr(sc, "bitmap file");
- error = xfile_create(descr, blocks * mp->m_sb.sb_blocksize, &sc->xfile);
- kfree(descr);
+ error = xfile_create("realtime bitmap file",
+ blocks * mp->m_sb.sb_blocksize, &sc->xfile);
if (error)
return error;
diff --git a/fs/xfs/scrub/rtrefcount.c b/fs/xfs/scrub/rtrefcount.c
index 4c5dffc73641..8cfe2f120b6b 100644
--- a/fs/xfs/scrub/rtrefcount.c
+++ b/fs/xfs/scrub/rtrefcount.c
@@ -3,7 +3,7 @@
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/rtrefcount_repair.c b/fs/xfs/scrub/rtrefcount_repair.c
index 983362447826..f713daf095fb 100644
--- a/fs/xfs/scrub/rtrefcount_repair.c
+++ b/fs/xfs/scrub/rtrefcount_repair.c
@@ -3,7 +3,7 @@
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -128,13 +128,7 @@ int
xrep_setup_rtrefcountbt(
struct xfs_scrub *sc)
{
- char *descr;
- int error;
-
- descr = xchk_xfile_ag_descr(sc, "rmap record bag");
- error = xrep_setup_xfbtree(sc, descr);
- kfree(descr);
- return error;
+ return xrep_setup_xfbtree(sc, "realtime rmap record bag");
}
/* Check for any obvious conflicts with this shared/CoW staging extent. */
@@ -704,7 +698,6 @@ xrep_rtrefcountbt(
{
struct xrep_rtrefc *rr;
struct xfs_mount *mp = sc->mp;
- char *descr;
int error;
/* We require the rmapbt to rebuild anything. */
@@ -722,11 +715,9 @@ xrep_rtrefcountbt(
rr->sc = sc;
/* Set up enough storage to handle one refcount record per rt extent. */
- descr = xchk_xfile_ag_descr(sc, "reference count records");
- error = xfarray_create(descr, mp->m_sb.sb_rextents,
- sizeof(struct xfs_refcount_irec),
+ error = xfarray_create("realtime reference count records",
+ mp->m_sb.sb_rextents, sizeof(struct xfs_refcount_irec),
&rr->refcount_records);
- kfree(descr);
if (error)
goto out_rr;
diff --git a/fs/xfs/scrub/rtrmap.c b/fs/xfs/scrub/rtrmap.c
index 12989fe80e8b..8b1a8389d32f 100644
--- a/fs/xfs/scrub/rtrmap.c
+++ b/fs/xfs/scrub/rtrmap.c
@@ -3,7 +3,7 @@
* Copyright (c) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/rtrmap_repair.c b/fs/xfs/scrub/rtrmap_repair.c
index 7561941a337a..4610d6d80648 100644
--- a/fs/xfs/scrub/rtrmap_repair.c
+++ b/fs/xfs/scrub/rtrmap_repair.c
@@ -3,7 +3,7 @@
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -103,14 +103,11 @@ xrep_setup_rtrmapbt(
struct xfs_scrub *sc)
{
struct xrep_rtrmap *rr;
- char *descr;
int error;
xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP);
- descr = xchk_xfile_rtgroup_descr(sc, "reverse mapping records");
- error = xrep_setup_xfbtree(sc, descr);
- kfree(descr);
+ error = xrep_setup_xfbtree(sc, "realtime reverse mapping records");
if (error)
return error;
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index 4ac679c1bd29..b510e6bbbd3e 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -43,7 +43,6 @@ xchk_setup_rtsummary(
struct xfs_scrub *sc)
{
struct xfs_mount *mp = sc->mp;
- char *descr;
struct xchk_rtsummary *rts;
int error;
@@ -70,10 +69,8 @@ xchk_setup_rtsummary(
* Create an xfile to construct a new rtsummary file. The xfile allows
* us to avoid pinning kernel memory for this purpose.
*/
- descr = xchk_xfile_descr(sc, "realtime summary file");
- error = xfile_create(descr, XFS_FSB_TO_B(mp, mp->m_rsumblocks),
- &sc->xfile);
- kfree(descr);
+ error = xfile_create("realtime summary file",
+ XFS_FSB_TO_B(mp, mp->m_rsumblocks), &sc->xfile);
if (error)
return error;
diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c
index d593977d70df..afffbd6e0ad1 100644
--- a/fs/xfs/scrub/rtsummary_repair.c
+++ b/fs/xfs/scrub/rtsummary_repair.c
@@ -3,7 +3,7 @@
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 3c3b0d25006f..c1c6415f5055 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -634,7 +634,7 @@ xchk_scrub_create_subord(
sub = kzalloc(sizeof(*sub), XCHK_GFP_FLAGS);
if (!sub)
- return ERR_PTR(-ENOMEM);
+ return NULL;
sub->old_smtype = sc->sm->sm_type;
sub->old_smflags = sc->sm->sm_flags;
diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c
index f8a37ea97791..4efafc5ae966 100644
--- a/fs/xfs/scrub/stats.c
+++ b/fs/xfs/scrub/stats.c
@@ -3,7 +3,7 @@
* Copyright (C) 2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
index c848bcc07cd5..91d40b9fb5c6 100644
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/symlink_repair.c b/fs/xfs/scrub/symlink_repair.c
index df629892462f..25416dfb5189 100644
--- a/fs/xfs/scrub/symlink_repair.c
+++ b/fs/xfs/scrub/symlink_repair.c
@@ -3,7 +3,7 @@
* Copyright (c) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
index cf99e0ca51b0..8d754df72aa5 100644
--- a/fs/xfs/scrub/tempfile.c
+++ b/fs/xfs/scrub/tempfile.c
@@ -3,7 +3,7 @@
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 987313a52e64..70d353287993 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/xfarray.c b/fs/xfs/scrub/xfarray.c
index ed2e8c64b1a8..c7c4a71b6fa7 100644
--- a/fs/xfs/scrub/xfarray.c
+++ b/fs/xfs/scrub/xfarray.c
@@ -3,7 +3,7 @@
* Copyright (C) 2021-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/xfblob.c b/fs/xfs/scrub/xfblob.c
index 6ef2a9637f16..96fc360312de 100644
--- a/fs/xfs/scrub/xfblob.c
+++ b/fs/xfs/scrub/xfblob.c
@@ -3,7 +3,7 @@
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c
index c753c79df203..2998c9b62f4b 100644
--- a/fs/xfs/scrub/xfile.c
+++ b/fs/xfs/scrub/xfile.c
@@ -3,7 +3,7 @@
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
deleted file mode 100644
index 9355ccad9503..000000000000
--- a/fs/xfs/xfs.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- */
-#ifndef __XFS_H__
-#define __XFS_H__
-
-#ifdef CONFIG_XFS_DEBUG
-#define DEBUG 1
-#endif
-
-#ifdef CONFIG_XFS_DEBUG_EXPENSIVE
-#define DEBUG_EXPENSIVE 1
-#endif
-
-#ifdef CONFIG_XFS_ASSERT_FATAL
-#define XFS_ASSERT_FATAL 1
-#endif
-
-#ifdef CONFIG_XFS_WARN
-#define XFS_WARN 1
-#endif
-
-
-#include "xfs_linux.h"
-
-#endif /* __XFS_H__ */
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index c7c3dcfa2718..fdfca6fc75b6 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -3,7 +3,7 @@
* Copyright (c) 2008, Christoph Hellwig
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 56a544638491..043ab12a18ea 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -4,7 +4,7 @@
* Copyright (c) 2016-2025 Christoph Hellwig.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 319004bf089f..92331991f9fd 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -4,7 +4,7 @@
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index e8fa326ac995..354472bf45f1 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -4,7 +4,7 @@
* Author: Allison Henderson <allison.henderson@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
@@ -192,10 +192,9 @@ xfs_attri_item_size(
STATIC void
xfs_attri_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
struct xfs_attri_log_nameval *nv = attrip->attri_nameval;
attrip->attri_format.alfi_type = XFS_LI_ATTRI;
@@ -220,24 +219,23 @@ xfs_attri_item_format(
if (nv->new_value.iov_len > 0)
attrip->attri_format.alfi_size++;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT,
- &attrip->attri_format,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_ATTRI_FORMAT, &attrip->attri_format,
sizeof(struct xfs_attri_log_format));
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NAME, nv->name.iov_base,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_NAME, nv->name.iov_base,
nv->name.iov_len);
if (nv->new_name.iov_len > 0)
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NEWNAME,
- nv->new_name.iov_base, nv->new_name.iov_len);
+ xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_NEWNAME,
+ nv->new_name.iov_base, nv->new_name.iov_len);
if (nv->value.iov_len > 0)
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_VALUE,
- nv->value.iov_base, nv->value.iov_len);
+ xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_VALUE,
+ nv->value.iov_base, nv->value.iov_len);
if (nv->new_value.iov_len > 0)
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NEWVALUE,
- nv->new_value.iov_base, nv->new_value.iov_len);
+ xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_NEWVALUE,
+ nv->new_value.iov_base, nv->new_value.iov_len);
}
/*
@@ -322,16 +320,15 @@ xfs_attrd_item_size(
*/
STATIC void
xfs_attrd_item_format(
- struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xfs_log_item *lip,
+ struct xlog_format_buf *lfb)
{
struct xfs_attrd_log_item *attrdp = ATTRD_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
attrdp->attrd_format.alfd_type = XFS_LI_ATTRD;
attrdp->attrd_format.alfd_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRD_FORMAT,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_ATTRD_FORMAT,
&attrdp->attrd_format,
sizeof(struct xfs_attrd_log_format));
}
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 379b48d015d2..114566b1ae5c 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -4,7 +4,7 @@
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
index 2a736d10eafb..b87e7975b613 100644
--- a/fs/xfs/xfs_bio_io.c
+++ b/fs/xfs/xfs_bio_io.c
@@ -2,7 +2,7 @@
/*
* Copyright (c) 2019 Christoph Hellwig.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
static inline unsigned int bio_max_vecs(unsigned int count)
{
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 80f0c4bcc483..e8775f254c89 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -3,7 +3,7 @@
* Copyright (C) 2016 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -92,10 +92,9 @@ unsigned int xfs_bui_log_space(unsigned int nr)
STATIC void
xfs_bui_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_bui_log_item *buip = BUI_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
ASSERT(atomic_read(&buip->bui_next_extent) ==
buip->bui_format.bui_nextents);
@@ -103,7 +102,7 @@ xfs_bui_item_format(
buip->bui_format.bui_type = XFS_LI_BUI;
buip->bui_format.bui_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_BUI_FORMAT, &buip->bui_format,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_BUI_FORMAT, &buip->bui_format,
xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents));
}
@@ -188,15 +187,14 @@ unsigned int xfs_bud_log_space(void)
STATIC void
xfs_bud_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_bud_log_item *budp = BUD_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
budp->bud_format.bud_type = XFS_LI_BUD;
budp->bud_format.bud_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_BUD_FORMAT, &budp->bud_format,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_BUD_FORMAT, &budp->bud_format,
sizeof(struct xfs_bud_log_format));
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2208a720ec3f..0ab00615f1ad 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -4,7 +4,7 @@
* Copyright (c) 2012 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 47edf3041631..db46883991de 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include <linux/backing-dev.h>
#include <linux/dax.h>
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index f4c5be67826e..8487635579e5 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -263,24 +263,21 @@ xfs_buf_item_size(
static inline void
xfs_buf_item_copy_iovec(
- struct xfs_log_vec *lv,
- struct xfs_log_iovec **vecp,
+ struct xlog_format_buf *lfb,
struct xfs_buf *bp,
uint offset,
int first_bit,
uint nbits)
{
offset += first_bit * XFS_BLF_CHUNK;
- xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BCHUNK,
- xfs_buf_offset(bp, offset),
+ xlog_format_copy(lfb, XLOG_REG_TYPE_BCHUNK, xfs_buf_offset(bp, offset),
nbits * XFS_BLF_CHUNK);
}
static void
xfs_buf_item_format_segment(
struct xfs_buf_log_item *bip,
- struct xfs_log_vec *lv,
- struct xfs_log_iovec **vecp,
+ struct xlog_format_buf *lfb,
uint offset,
struct xfs_buf_log_format *blfp)
{
@@ -308,7 +305,7 @@ xfs_buf_item_format_segment(
return;
}
- blfp = xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size);
+ blfp = xlog_format_copy(lfb, XLOG_REG_TYPE_BFORMAT, blfp, base_size);
blfp->blf_size = 1;
if (bip->bli_flags & XFS_BLI_STALE) {
@@ -331,8 +328,7 @@ xfs_buf_item_format_segment(
nbits = xfs_contig_bits(blfp->blf_data_map,
blfp->blf_map_size, first_bit);
ASSERT(nbits > 0);
- xfs_buf_item_copy_iovec(lv, vecp, bp, offset,
- first_bit, nbits);
+ xfs_buf_item_copy_iovec(lfb, bp, offset, first_bit, nbits);
blfp->blf_size++;
/*
@@ -357,11 +353,10 @@ xfs_buf_item_format_segment(
STATIC void
xfs_buf_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
- struct xfs_log_iovec *vecp = NULL;
uint offset = 0;
int i;
@@ -398,7 +393,7 @@ xfs_buf_item_format(
}
for (i = 0; i < bip->bli_format_count; i++) {
- xfs_buf_item_format_segment(bip, lv, &vecp, offset,
+ xfs_buf_item_format_segment(bip, lfb, offset,
&bip->bli_formats[i]);
offset += BBTOB(bp->b_maps[i].bm_len);
}
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index e4c8af873632..77ad071ebe78 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c
index dcbfa274e06d..0106da0a9f44 100644
--- a/fs/xfs/xfs_buf_mem.c
+++ b/fs/xfs/xfs_buf_mem.c
@@ -3,7 +3,7 @@
* Copyright (c) 2023-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_buf.h"
#include "xfs_buf_mem.h"
diff --git a/fs/xfs/xfs_dahash_test.c b/fs/xfs/xfs_dahash_test.c
index 0dab5941e080..f1ee2643b948 100644
--- a/fs/xfs/xfs_dahash_test.c
+++ b/fs/xfs/xfs_dahash_test.c
@@ -3,7 +3,7 @@
* Copyright (C) 2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 06ac5a7de60a..60a80d4173f7 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -4,7 +4,7 @@
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index b6ffe4807a11..31477a74b523 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -3,7 +3,7 @@
* Copyright (C) 2010, 2023 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 612ca682a513..2b208e2c5264 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2003 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index b374cd9f1900..491e2a7053a3 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2003 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -44,25 +44,24 @@ xfs_qm_dquot_logitem_size(
STATIC void
xfs_qm_dquot_logitem_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_disk_dquot ddq;
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
struct xfs_dq_logformat *qlf;
- qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QFORMAT);
+ qlf = xlog_format_start(lfb, XLOG_REG_TYPE_QFORMAT);
qlf->qlf_type = XFS_LI_DQUOT;
qlf->qlf_size = 2;
qlf->qlf_id = qlip->qli_dquot->q_id;
qlf->qlf_blkno = qlip->qli_dquot->q_blkno;
qlf->qlf_len = 1;
qlf->qlf_boffset = qlip->qli_dquot->q_bufoffset;
- xlog_finish_iovec(lv, vecp, sizeof(struct xfs_dq_logformat));
+ xlog_format_commit(lfb, sizeof(struct xfs_dq_logformat));
xfs_dquot_to_disk(&ddq, qlip->qli_dquot);
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_DQUOT, &ddq,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_DQUOT, &ddq,
sizeof(struct xfs_disk_dquot));
}
diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
index 89bc9bcaf51e..fe419b28de22 100644
--- a/fs/xfs/xfs_dquot_item_recover.c
+++ b/fs/xfs/xfs_dquot_item_recover.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_drain.c b/fs/xfs/xfs_drain.c
index fa5f31931efd..1ad67f6c1fbf 100644
--- a/fs/xfs/xfs_drain.c
+++ b/fs/xfs/xfs_drain.c
@@ -3,7 +3,7 @@
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 39830b252ac8..d652240a1dca 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_fs.h"
@@ -22,6 +22,12 @@
static const unsigned int xfs_errortag_random_default[] = { XFS_ERRTAGS };
#undef XFS_ERRTAG
+#define XFS_ERRTAG(_tag, _name, _default) \
+ [XFS_ERRTAG_##_tag] = __stringify(_name),
+#include "xfs_errortag.h"
+static const char *xfs_errortag_names[] = { XFS_ERRTAGS };
+#undef XFS_ERRTAG
+
struct xfs_errortag_attr {
struct attribute attr;
unsigned int tag;
@@ -50,17 +56,18 @@ xfs_errortag_attr_store(
{
struct xfs_mount *mp = to_mp(kobject);
unsigned int error_tag = to_attr(attr)->tag;
+ unsigned int val;
int ret;
if (strcmp(buf, "default") == 0) {
- mp->m_errortag[error_tag] =
- xfs_errortag_random_default[error_tag];
+ val = xfs_errortag_random_default[error_tag];
} else {
- ret = kstrtouint(buf, 0, &mp->m_errortag[error_tag]);
+ ret = kstrtouint(buf, 0, &val);
if (ret)
return ret;
}
+ WRITE_ONCE(mp->m_errortag[error_tag], val);
return count;
}
@@ -71,9 +78,9 @@ xfs_errortag_attr_show(
char *buf)
{
struct xfs_mount *mp = to_mp(kobject);
- unsigned int error_tag = to_attr(attr)->tag;
- return snprintf(buf, PAGE_SIZE, "%u\n", mp->m_errortag[error_tag]);
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ READ_ONCE(mp->m_errortag[to_attr(attr)->tag]));
}
static const struct sysfs_ops xfs_errortag_sysfs_ops = {
@@ -114,18 +121,8 @@ int
xfs_errortag_init(
struct xfs_mount *mp)
{
- int ret;
-
- mp->m_errortag = kzalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX,
- GFP_KERNEL | __GFP_RETRY_MAYFAIL);
- if (!mp->m_errortag)
- return -ENOMEM;
-
- ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
+ return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
&mp->m_kobj, "errortag");
- if (ret)
- kfree(mp->m_errortag);
- return ret;
}
void
@@ -133,33 +130,6 @@ xfs_errortag_del(
struct xfs_mount *mp)
{
xfs_sysfs_del(&mp->m_errortag_kobj);
- kfree(mp->m_errortag);
-}
-
-static bool
-xfs_errortag_valid(
- unsigned int error_tag)
-{
- if (error_tag >= XFS_ERRTAG_MAX)
- return false;
-
- /* Error out removed injection types */
- if (error_tag == XFS_ERRTAG_DROP_WRITES)
- return false;
- return true;
-}
-
-bool
-xfs_errortag_enabled(
- struct xfs_mount *mp,
- unsigned int tag)
-{
- if (!mp->m_errortag)
- return false;
- if (!xfs_errortag_valid(tag))
- return false;
-
- return mp->m_errortag[tag] != 0;
}
bool
@@ -171,21 +141,7 @@ xfs_errortag_test(
{
unsigned int randfactor;
- /*
- * To be able to use error injection anywhere, we need to ensure error
- * injection mechanism is already initialized.
- *
- * Code paths like I/O completion can be called before the
- * initialization is complete, but be able to inject errors in such
- * places is still useful.
- */
- if (!mp->m_errortag)
- return false;
-
- if (!xfs_errortag_valid(error_tag))
- return false;
-
- randfactor = mp->m_errortag[error_tag];
+ randfactor = READ_ONCE(mp->m_errortag[error_tag]);
if (!randfactor || get_random_u32_below(randfactor))
return false;
@@ -195,6 +151,27 @@ xfs_errortag_test(
return true;
}
+void
+xfs_errortag_delay(
+ struct xfs_mount *mp,
+ const char *file,
+ int line,
+ unsigned int error_tag)
+{
+ unsigned int delay = READ_ONCE(mp->m_errortag[error_tag]);
+
+ might_sleep();
+
+ if (!delay)
+ return;
+
+ xfs_warn_ratelimited(mp,
+"Injecting %ums delay at file %s, line %d, on filesystem \"%s\"",
+ delay, file, line,
+ mp->m_super->s_id);
+ mdelay(delay);
+}
+
int
xfs_errortag_add(
struct xfs_mount *mp,
@@ -202,17 +179,60 @@ xfs_errortag_add(
{
BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX);
- if (!xfs_errortag_valid(error_tag))
+ if (error_tag >= XFS_ERRTAG_MAX)
+ return -EINVAL;
+
+ /* Error out removed injection types */
+ switch (error_tag) {
+ case XFS_ERRTAG_DROP_WRITES:
return -EINVAL;
- mp->m_errortag[error_tag] = xfs_errortag_random_default[error_tag];
+ default:
+ break;
+ }
+
+ WRITE_ONCE(mp->m_errortag[error_tag],
+ xfs_errortag_random_default[error_tag]);
return 0;
}
int
+xfs_errortag_add_name(
+ struct xfs_mount *mp,
+ const char *tag_name)
+{
+ unsigned int i;
+
+ for (i = 0; i < XFS_ERRTAG_MAX; i++) {
+ if (xfs_errortag_names[i] &&
+ !strcmp(xfs_errortag_names[i], tag_name))
+ return xfs_errortag_add(mp, i);
+ }
+
+ return -EINVAL;
+}
+
+void
+xfs_errortag_copy(
+ struct xfs_mount *dst_mp,
+ struct xfs_mount *src_mp)
+{
+ unsigned int val, i;
+
+ for (i = 0; i < XFS_ERRTAG_MAX; i++) {
+ val = READ_ONCE(src_mp->m_errortag[i]);
+ if (val)
+ WRITE_ONCE(dst_mp->m_errortag[i], val);
+ }
+}
+
+int
xfs_errortag_clearall(
struct xfs_mount *mp)
{
- memset(mp->m_errortag, 0, sizeof(unsigned int) * XFS_ERRTAG_MAX);
+ unsigned int i;
+
+ for (i = 0; i < XFS_ERRTAG_MAX; i++)
+ WRITE_ONCE(mp->m_errortag[i], 0);
return 0;
}
#endif /* DEBUG */
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index fe6a71bbe9cd..05fc1d1cf521 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -40,28 +40,23 @@ bool xfs_errortag_test(struct xfs_mount *mp, const char *file, int line,
unsigned int error_tag);
#define XFS_TEST_ERROR(mp, tag) \
xfs_errortag_test((mp), __FILE__, __LINE__, (tag))
-bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag);
+void xfs_errortag_delay(struct xfs_mount *mp, const char *file, int line,
+ unsigned int error_tag);
#define XFS_ERRORTAG_DELAY(mp, tag) \
- do { \
- might_sleep(); \
- if (!xfs_errortag_enabled((mp), (tag))) \
- break; \
- xfs_warn_ratelimited((mp), \
-"Injecting %ums delay at file %s, line %d, on filesystem \"%s\"", \
- (mp)->m_errortag[(tag)], __FILE__, __LINE__, \
- (mp)->m_super->s_id); \
- mdelay((mp)->m_errortag[(tag)]); \
- } while (0)
-
+ xfs_errortag_delay((mp), __FILE__, __LINE__, (tag))
int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag);
+int xfs_errortag_add_name(struct xfs_mount *mp, const char *tag_name);
+void xfs_errortag_copy(struct xfs_mount *dst_mp, struct xfs_mount *src_mp);
int xfs_errortag_clearall(struct xfs_mount *mp);
#else
#define xfs_errortag_init(mp) (0)
#define xfs_errortag_del(mp)
#define XFS_TEST_ERROR(mp, tag) (false)
#define XFS_ERRORTAG_DELAY(mp, tag) ((void)0)
-#define xfs_errortag_add(mp, tag) (ENOSYS)
-#define xfs_errortag_clearall(mp) (ENOSYS)
+#define xfs_errortag_add(mp, tag) (-ENOSYS)
+#define xfs_errortag_copy(dst_mp, src_mp) ((void)0)
+#define xfs_errortag_add_name(mp, tag_name) (-ENOSYS)
+#define xfs_errortag_clearall(mp) (-ENOSYS)
#endif /* DEBUG */
/*
diff --git a/fs/xfs/xfs_exchmaps_item.c b/fs/xfs/xfs_exchmaps_item.c
index 229cbe0adf17..13a42467370f 100644
--- a/fs/xfs/xfs_exchmaps_item.c
+++ b/fs/xfs/xfs_exchmaps_item.c
@@ -3,7 +3,7 @@
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -83,16 +83,14 @@ xfs_xmi_item_size(
STATIC void
xfs_xmi_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
xmi_lip->xmi_format.xmi_type = XFS_LI_XMI;
xmi_lip->xmi_format.xmi_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMI_FORMAT,
- &xmi_lip->xmi_format,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_XMI_FORMAT, &xmi_lip->xmi_format,
sizeof(struct xfs_xmi_log_format));
}
@@ -166,15 +164,14 @@ xfs_xmd_item_size(
STATIC void
xfs_xmd_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_xmd_log_item *xmd_lip = XMD_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
xmd_lip->xmd_format.xmd_type = XFS_LI_XMD;
xmd_lip->xmd_format.xmd_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMD_FORMAT, &xmd_lip->xmd_format,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_XMD_FORMAT, &xmd_lip->xmd_format,
sizeof(struct xfs_xmd_log_format));
}
diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c
index 0b41bdfecdfb..5c083f29ea65 100644
--- a/fs/xfs/xfs_exchrange.c
+++ b/fs/xfs/xfs_exchrange.c
@@ -3,7 +3,7 @@
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 201489d3de08..e3e3c3c89840 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index da3161572735..cfecb2959472 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -5,7 +5,7 @@
* Copyright (c) 2011 Christoph Hellwig.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 418ddab590e0..749a4eb9793c 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -98,10 +98,9 @@ unsigned int xfs_efi_log_space(unsigned int nr)
STATIC void
xfs_efi_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
ASSERT(atomic_read(&efip->efi_next_extent) ==
efip->efi_format.efi_nextents);
@@ -110,7 +109,7 @@ xfs_efi_item_format(
efip->efi_format.efi_type = lip->li_type;
efip->efi_format.efi_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format,
xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents));
}
@@ -277,10 +276,9 @@ unsigned int xfs_efd_log_space(unsigned int nr)
STATIC void
xfs_efd_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents);
ASSERT(lip->li_type == XFS_LI_EFD || lip->li_type == XFS_LI_EFD_RT);
@@ -288,7 +286,7 @@ xfs_efd_item_format(
efdp->efd_format.efd_type = lip->li_type;
efdp->efd_format.efd_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format,
xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents));
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ecd7bf42446b..04176aae6997 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 044918fbae06..44e1b14069a3 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -4,7 +4,7 @@
* Copyright (c) 2014 Christoph Hellwig.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index af68c7de8ee8..098c2b50bc6f 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -3,7 +3,7 @@
* Copyright (C) 2017 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b7c21f68edc7..17255c41786b 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -25,6 +25,7 @@
#include "xfs_rtrmap_btree.h"
#include "xfs_rtrefcount_btree.h"
#include "xfs_metafile.h"
+#include "xfs_healthmon.h"
#include <linux/fserror.h>
@@ -544,6 +545,7 @@ xfs_do_force_shutdown(
xfs_stack_trace();
fserror_report_shutdown(mp->m_super, GFP_KERNEL);
+ xfs_healthmon_report_shutdown(mp, flags);
}
/*
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index 566fd663c95b..60efe8246304 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_error.h"
/*
diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c
index 5a3e3bf4e7cc..d1291ca15239 100644
--- a/fs/xfs/xfs_handle.c
+++ b/fs/xfs/xfs_handle.c
@@ -4,7 +4,7 @@
* Copyright (c) 2022-2024 Oracle.
* All rights reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index fbb8886c72fe..169123772cb3 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -3,7 +3,7 @@
* Copyright (C) 2019 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -19,6 +19,7 @@
#include "xfs_da_btree.h"
#include "xfs_quota_defs.h"
#include "xfs_rtgroup.h"
+#include "xfs_healthmon.h"
#include <linux/fserror.h>
@@ -107,14 +108,19 @@ xfs_fs_mark_sick(
struct xfs_mount *mp,
unsigned int mask)
{
+ unsigned int old_mask;
+
ASSERT(!(mask & ~XFS_SICK_FS_ALL));
trace_xfs_fs_mark_sick(mp, mask);
spin_lock(&mp->m_sb_lock);
+ old_mask = mp->m_fs_sick;
mp->m_fs_sick |= mask;
spin_unlock(&mp->m_sb_lock);
fserror_report_metadata(mp->m_super, -EFSCORRUPTED, GFP_NOFS);
+ if (mask)
+ xfs_healthmon_report_fs(mp, XFS_HEALTHMON_SICK, old_mask, mask);
}
/* Mark per-fs metadata as having been checked and found unhealthy by fsck. */
@@ -123,15 +129,21 @@ xfs_fs_mark_corrupt(
struct xfs_mount *mp,
unsigned int mask)
{
+ unsigned int old_mask;
+
ASSERT(!(mask & ~XFS_SICK_FS_ALL));
trace_xfs_fs_mark_corrupt(mp, mask);
spin_lock(&mp->m_sb_lock);
+ old_mask = mp->m_fs_sick;
mp->m_fs_sick |= mask;
mp->m_fs_checked |= mask;
spin_unlock(&mp->m_sb_lock);
fserror_report_metadata(mp->m_super, -EFSCORRUPTED, GFP_NOFS);
+ if (mask)
+ xfs_healthmon_report_fs(mp, XFS_HEALTHMON_CORRUPT, old_mask,
+ mask);
}
/* Mark a per-fs metadata healed. */
@@ -140,15 +152,22 @@ xfs_fs_mark_healthy(
struct xfs_mount *mp,
unsigned int mask)
{
+ unsigned int old_mask;
+
ASSERT(!(mask & ~XFS_SICK_FS_ALL));
trace_xfs_fs_mark_healthy(mp, mask);
spin_lock(&mp->m_sb_lock);
+ old_mask = mp->m_fs_sick;
mp->m_fs_sick &= ~mask;
if (!(mp->m_fs_sick & XFS_SICK_FS_PRIMARY))
mp->m_fs_sick &= ~XFS_SICK_FS_SECONDARY;
mp->m_fs_checked |= mask;
spin_unlock(&mp->m_sb_lock);
+
+ if (mask)
+ xfs_healthmon_report_fs(mp, XFS_HEALTHMON_HEALTHY, old_mask,
+ mask);
}
/* Sample which per-fs metadata are unhealthy. */
@@ -198,14 +217,20 @@ xfs_group_mark_sick(
struct xfs_group *xg,
unsigned int mask)
{
+ unsigned int old_mask;
+
xfs_group_check_mask(xg, mask);
trace_xfs_group_mark_sick(xg, mask);
spin_lock(&xg->xg_state_lock);
+ old_mask = xg->xg_sick;
xg->xg_sick |= mask;
spin_unlock(&xg->xg_state_lock);
fserror_report_metadata(xg->xg_mount->m_super, -EFSCORRUPTED, GFP_NOFS);
+ if (mask)
+ xfs_healthmon_report_group(xg, XFS_HEALTHMON_SICK, old_mask,
+ mask);
}
/*
@@ -216,15 +241,21 @@ xfs_group_mark_corrupt(
struct xfs_group *xg,
unsigned int mask)
{
+ unsigned int old_mask;
+
xfs_group_check_mask(xg, mask);
trace_xfs_group_mark_corrupt(xg, mask);
spin_lock(&xg->xg_state_lock);
+ old_mask = xg->xg_sick;
xg->xg_sick |= mask;
xg->xg_checked |= mask;
spin_unlock(&xg->xg_state_lock);
fserror_report_metadata(xg->xg_mount->m_super, -EFSCORRUPTED, GFP_NOFS);
+ if (mask)
+ xfs_healthmon_report_group(xg, XFS_HEALTHMON_CORRUPT, old_mask,
+ mask);
}
/*
@@ -235,15 +266,22 @@ xfs_group_mark_healthy(
struct xfs_group *xg,
unsigned int mask)
{
+ unsigned int old_mask;
+
xfs_group_check_mask(xg, mask);
trace_xfs_group_mark_healthy(xg, mask);
spin_lock(&xg->xg_state_lock);
+ old_mask = xg->xg_sick;
xg->xg_sick &= ~mask;
if (!(xg->xg_sick & XFS_SICK_AG_PRIMARY))
xg->xg_sick &= ~XFS_SICK_AG_SECONDARY;
xg->xg_checked |= mask;
spin_unlock(&xg->xg_state_lock);
+
+ if (mask)
+ xfs_healthmon_report_group(xg, XFS_HEALTHMON_HEALTHY, old_mask,
+ mask);
}
/* Sample which per-ag metadata are unhealthy. */
@@ -282,10 +320,13 @@ xfs_inode_mark_sick(
struct xfs_inode *ip,
unsigned int mask)
{
+ unsigned int old_mask;
+
ASSERT(!(mask & ~XFS_SICK_INO_ALL));
trace_xfs_inode_mark_sick(ip, mask);
spin_lock(&ip->i_flags_lock);
+ old_mask = ip->i_sick;
ip->i_sick |= mask;
spin_unlock(&ip->i_flags_lock);
@@ -299,6 +340,9 @@ xfs_inode_mark_sick(
spin_unlock(&VFS_I(ip)->i_lock);
fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS);
+ if (mask)
+ xfs_healthmon_report_inode(ip, XFS_HEALTHMON_SICK, old_mask,
+ mask);
}
/* Mark inode metadata as having been checked and found unhealthy by fsck. */
@@ -307,10 +351,13 @@ xfs_inode_mark_corrupt(
struct xfs_inode *ip,
unsigned int mask)
{
+ unsigned int old_mask;
+
ASSERT(!(mask & ~XFS_SICK_INO_ALL));
trace_xfs_inode_mark_corrupt(ip, mask);
spin_lock(&ip->i_flags_lock);
+ old_mask = ip->i_sick;
ip->i_sick |= mask;
ip->i_checked |= mask;
spin_unlock(&ip->i_flags_lock);
@@ -325,6 +372,9 @@ xfs_inode_mark_corrupt(
spin_unlock(&VFS_I(ip)->i_lock);
fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS);
+ if (mask)
+ xfs_healthmon_report_inode(ip, XFS_HEALTHMON_CORRUPT, old_mask,
+ mask);
}
/* Mark parts of an inode healed. */
@@ -333,15 +383,22 @@ xfs_inode_mark_healthy(
struct xfs_inode *ip,
unsigned int mask)
{
+ unsigned int old_mask;
+
ASSERT(!(mask & ~XFS_SICK_INO_ALL));
trace_xfs_inode_mark_healthy(ip, mask);
spin_lock(&ip->i_flags_lock);
+ old_mask = ip->i_sick;
ip->i_sick &= ~mask;
if (!(ip->i_sick & XFS_SICK_INO_PRIMARY))
ip->i_sick &= ~XFS_SICK_INO_SECONDARY;
ip->i_checked |= mask;
spin_unlock(&ip->i_flags_lock);
+
+ if (mask)
+ xfs_healthmon_report_inode(ip, XFS_HEALTHMON_HEALTHY, old_mask,
+ mask);
}
/* Sample which parts of an inode are unhealthy. */
@@ -421,6 +478,25 @@ xfs_fsop_geom_health(
}
}
+/*
+ * Translate XFS_SICK_FS_* into XFS_FSOP_GEOM_SICK_* except for the rt free
+ * space codes, which are sent via the rtgroup events.
+ */
+unsigned int
+xfs_healthmon_fs_mask(
+ unsigned int sick_mask)
+{
+ const struct ioctl_sick_map *m;
+ unsigned int ioctl_mask = 0;
+
+ for_each_sick_map(fs_map, m) {
+ if (sick_mask & m->sick_mask)
+ ioctl_mask |= m->ioctl_mask;
+ }
+
+ return ioctl_mask;
+}
+
static const struct ioctl_sick_map ag_map[] = {
{ XFS_SICK_AG_SB, XFS_AG_GEOM_SICK_SB },
{ XFS_SICK_AG_AGF, XFS_AG_GEOM_SICK_AGF },
@@ -457,6 +533,22 @@ xfs_ag_geom_health(
}
}
+/* Translate XFS_SICK_AG_* into XFS_AG_GEOM_SICK_*. */
+unsigned int
+xfs_healthmon_perag_mask(
+ unsigned int sick_mask)
+{
+ const struct ioctl_sick_map *m;
+ unsigned int ioctl_mask = 0;
+
+ for_each_sick_map(ag_map, m) {
+ if (sick_mask & m->sick_mask)
+ ioctl_mask |= m->ioctl_mask;
+ }
+
+ return ioctl_mask;
+}
+
static const struct ioctl_sick_map rtgroup_map[] = {
{ XFS_SICK_RG_SUPER, XFS_RTGROUP_GEOM_SICK_SUPER },
{ XFS_SICK_RG_BITMAP, XFS_RTGROUP_GEOM_SICK_BITMAP },
@@ -487,6 +579,22 @@ xfs_rtgroup_geom_health(
}
}
+/* Translate XFS_SICK_RG_* into XFS_RTGROUP_GEOM_SICK_*. */
+unsigned int
+xfs_healthmon_rtgroup_mask(
+ unsigned int sick_mask)
+{
+ const struct ioctl_sick_map *m;
+ unsigned int ioctl_mask = 0;
+
+ for_each_sick_map(rtgroup_map, m) {
+ if (sick_mask & m->sick_mask)
+ ioctl_mask |= m->ioctl_mask;
+ }
+
+ return ioctl_mask;
+}
+
static const struct ioctl_sick_map ino_map[] = {
{ XFS_SICK_INO_CORE, XFS_BS_SICK_INODE },
{ XFS_SICK_INO_BMBTD, XFS_BS_SICK_BMBTD },
@@ -525,6 +633,22 @@ xfs_bulkstat_health(
}
}
+/* Translate XFS_SICK_INO_* into XFS_BS_SICK_*. */
+unsigned int
+xfs_healthmon_inode_mask(
+ unsigned int sick_mask)
+{
+ const struct ioctl_sick_map *m;
+ unsigned int ioctl_mask = 0;
+
+ for_each_sick_map(ino_map, m) {
+ if (sick_mask & m->sick_mask)
+ ioctl_mask |= m->ioctl_mask;
+ }
+
+ return ioctl_mask;
+}
+
/* Mark a block mapping sick. */
void
xfs_bmap_mark_sick(
diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c
new file mode 100644
index 000000000000..ca7352dcd182
--- /dev/null
+++ b/fs/xfs/xfs_healthmon.c
@@ -0,0 +1,1255 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2024-2026 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs_platform.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trace.h"
+#include "xfs_ag.h"
+#include "xfs_btree.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_quota_defs.h"
+#include "xfs_rtgroup.h"
+#include "xfs_health.h"
+#include "xfs_healthmon.h"
+#include "xfs_fsops.h"
+#include "xfs_notify_failure.h"
+#include "xfs_file.h"
+#include "xfs_ioctl.h"
+
+#include <linux/anon_inodes.h>
+#include <linux/eventpoll.h>
+#include <linux/poll.h>
+#include <linux/fserror.h>
+
+/*
+ * Live Health Monitoring
+ * ======================
+ *
+ * Autonomous self-healing of XFS filesystems requires a means for the kernel
+ * to send filesystem health events to a monitoring daemon in userspace. To
+ * accomplish this, we establish a thread_with_file kthread object to handle
+ * translating internal events about filesystem health into a format that can
+ * be parsed easily by userspace. When those internal events occur, the core
+ * filesystem code calls this health monitor to convey the events to userspace.
+ * Userspace reads events from the file descriptor returned by the ioctl.
+ *
+ * The healthmon abstraction has a weak reference to the host filesystem mount
+ * so that the queueing and processing of the events do not pin the mount and
+ * cannot slow down the main filesystem. The healthmon object can exist past
+ * the end of the filesystem mount.
+ */
+
+/* sign of a detached health monitor */
+#define DETACHED_MOUNT_COOKIE ((uintptr_t)0)
+
+/* Constrain the number of event objects that can build up in memory. */
+#define XFS_HEALTHMON_MAX_EVENTS (SZ_32K / \
+ sizeof(struct xfs_healthmon_event))
+
+/* Constrain the size of the output buffer for read_iter. */
+#define XFS_HEALTHMON_MAX_OUTBUF SZ_64K
+
+/* spinlock for atomically updating xfs_mount <-> xfs_healthmon pointers */
+static DEFINE_SPINLOCK(xfs_healthmon_lock);
+
+/* Grab a reference to the healthmon object for a given mount, if any. */
+static struct xfs_healthmon *
+xfs_healthmon_get(
+ struct xfs_mount *mp)
+{
+ struct xfs_healthmon *hm;
+
+ rcu_read_lock();
+ hm = mp->m_healthmon;
+ if (hm && !refcount_inc_not_zero(&hm->ref))
+ hm = NULL;
+ rcu_read_unlock();
+
+ return hm;
+}
+
+/*
+ * Release the reference to a healthmon object. If there are no more holders,
+ * free the health monitor after an RCU grace period to eliminate possibility
+ * of races with xfs_healthmon_get.
+ */
+static void
+xfs_healthmon_put(
+ struct xfs_healthmon *hm)
+{
+ if (refcount_dec_and_test(&hm->ref)) {
+ struct xfs_healthmon_event *event;
+ struct xfs_healthmon_event *next = hm->first_event;
+
+ while ((event = next) != NULL) {
+ trace_xfs_healthmon_drop(hm, event);
+ next = event->next;
+ kfree(event);
+ }
+
+ kfree(hm->unmount_event);
+ kfree(hm->buffer);
+ mutex_destroy(&hm->lock);
+ kfree_rcu_mightsleep(hm);
+ }
+}
+
+/* Attach a health monitor to an xfs_mount. Only one allowed at a time. */
+STATIC int
+xfs_healthmon_attach(
+ struct xfs_mount *mp,
+ struct xfs_healthmon *hm)
+{
+ spin_lock(&xfs_healthmon_lock);
+ if (mp->m_healthmon != NULL) {
+ spin_unlock(&xfs_healthmon_lock);
+ return -EEXIST;
+ }
+
+ refcount_inc(&hm->ref);
+ mp->m_healthmon = hm;
+ hm->mount_cookie = (uintptr_t)mp->m_super;
+ spin_unlock(&xfs_healthmon_lock);
+
+ return 0;
+}
+
+/* Detach a xfs mount from a specific healthmon instance. */
+STATIC void
+xfs_healthmon_detach(
+ struct xfs_healthmon *hm)
+{
+ spin_lock(&xfs_healthmon_lock);
+ if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) {
+ spin_unlock(&xfs_healthmon_lock);
+ return;
+ }
+
+ XFS_M((struct super_block *)hm->mount_cookie)->m_healthmon = NULL;
+ hm->mount_cookie = DETACHED_MOUNT_COOKIE;
+ spin_unlock(&xfs_healthmon_lock);
+
+ trace_xfs_healthmon_detach(hm);
+ xfs_healthmon_put(hm);
+}
+
+static inline void xfs_healthmon_bump_events(struct xfs_healthmon *hm)
+{
+ hm->events++;
+ hm->total_events++;
+}
+
+static inline void xfs_healthmon_bump_lost(struct xfs_healthmon *hm)
+{
+ hm->lost_prev_event++;
+ hm->total_lost++;
+}
+
+/*
+ * If possible, merge a new event into an existing event. Returns whether or
+ * not it merged anything.
+ */
+static bool
+xfs_healthmon_merge_events(
+ struct xfs_healthmon_event *existing,
+ const struct xfs_healthmon_event *new)
+{
+ if (!existing)
+ return false;
+
+ /* type and domain must match to merge events */
+ if (existing->type != new->type ||
+ existing->domain != new->domain)
+ return false;
+
+ switch (existing->type) {
+ case XFS_HEALTHMON_RUNNING:
+ case XFS_HEALTHMON_UNMOUNT:
+ /* should only ever be one of these events anyway */
+ return false;
+
+ case XFS_HEALTHMON_LOST:
+ existing->lostcount += new->lostcount;
+ return true;
+
+ case XFS_HEALTHMON_SICK:
+ case XFS_HEALTHMON_CORRUPT:
+ case XFS_HEALTHMON_HEALTHY:
+ switch (existing->domain) {
+ case XFS_HEALTHMON_FS:
+ existing->fsmask |= new->fsmask;
+ return true;
+ case XFS_HEALTHMON_AG:
+ case XFS_HEALTHMON_RTGROUP:
+ if (existing->group == new->group){
+ existing->grpmask |= new->grpmask;
+ return true;
+ }
+ return false;
+ case XFS_HEALTHMON_INODE:
+ if (existing->ino == new->ino &&
+ existing->gen == new->gen) {
+ existing->imask |= new->imask;
+ return true;
+ }
+ return false;
+ default:
+ ASSERT(0);
+ return false;
+ }
+ return false;
+
+ case XFS_HEALTHMON_SHUTDOWN:
+ /* yes, we can race to shutdown */
+ existing->flags |= new->flags;
+ return true;
+
+ case XFS_HEALTHMON_MEDIA_ERROR:
+ /* physically adjacent errors can merge */
+ if (existing->daddr + existing->bbcount == new->daddr) {
+ existing->bbcount += new->bbcount;
+ return true;
+ }
+ if (new->daddr + new->bbcount == existing->daddr) {
+ existing->daddr = new->daddr;
+ existing->bbcount += new->bbcount;
+ return true;
+ }
+ return false;
+
+ case XFS_HEALTHMON_BUFREAD:
+ case XFS_HEALTHMON_BUFWRITE:
+ case XFS_HEALTHMON_DIOREAD:
+ case XFS_HEALTHMON_DIOWRITE:
+ case XFS_HEALTHMON_DATALOST:
+ /* logically adjacent file ranges can merge */
+ if (existing->fino != new->fino || existing->fgen != new->fgen)
+ return false;
+
+ if (existing->fpos + existing->flen == new->fpos) {
+ existing->flen += new->flen;
+ return true;
+ }
+
+ if (new->fpos + new->flen == existing->fpos) {
+ existing->fpos = new->fpos;
+ existing->flen += new->flen;
+ return true;
+ }
+ return false;
+ }
+
+ return false;
+}
+
+/* Insert an event onto the start of the queue. */
+static inline void
+__xfs_healthmon_insert(
+ struct xfs_healthmon *hm,
+ struct xfs_healthmon_event *event)
+{
+ struct timespec64 now;
+
+ ktime_get_coarse_real_ts64(&now);
+ event->time_ns = (now.tv_sec * NSEC_PER_SEC) + now.tv_nsec;
+
+ event->next = hm->first_event;
+ if (!hm->first_event)
+ hm->first_event = event;
+ if (!hm->last_event)
+ hm->last_event = event;
+ xfs_healthmon_bump_events(hm);
+ wake_up(&hm->wait);
+
+ trace_xfs_healthmon_insert(hm, event);
+}
+
+/* Push an event onto the end of the queue. */
+static inline void
+__xfs_healthmon_push(
+ struct xfs_healthmon *hm,
+ struct xfs_healthmon_event *event)
+{
+ struct timespec64 now;
+
+ ktime_get_coarse_real_ts64(&now);
+ event->time_ns = (now.tv_sec * NSEC_PER_SEC) + now.tv_nsec;
+
+ if (!hm->first_event)
+ hm->first_event = event;
+ if (hm->last_event)
+ hm->last_event->next = event;
+ hm->last_event = event;
+ event->next = NULL;
+ xfs_healthmon_bump_events(hm);
+ wake_up(&hm->wait);
+
+ trace_xfs_healthmon_push(hm, event);
+}
+
+/* Deal with any previously lost events */
+static int
+xfs_healthmon_clear_lost_prev(
+ struct xfs_healthmon *hm)
+{
+ struct xfs_healthmon_event lost_event = {
+ .type = XFS_HEALTHMON_LOST,
+ .domain = XFS_HEALTHMON_MOUNT,
+ .lostcount = hm->lost_prev_event,
+ };
+ struct xfs_healthmon_event *event = NULL;
+
+ if (xfs_healthmon_merge_events(hm->last_event, &lost_event)) {
+ trace_xfs_healthmon_merge(hm, hm->last_event);
+ wake_up(&hm->wait);
+ goto cleared;
+ }
+
+ if (hm->events < XFS_HEALTHMON_MAX_EVENTS)
+ event = kmemdup(&lost_event, sizeof(struct xfs_healthmon_event),
+ GFP_NOFS);
+ if (!event)
+ return -ENOMEM;
+
+ __xfs_healthmon_push(hm, event);
+cleared:
+ hm->lost_prev_event = 0;
+ return 0;
+}
+
+/*
+ * Push an event onto the end of the list after dealing with lost events and
+ * possibly full queues.
+ */
+STATIC int
+xfs_healthmon_push(
+ struct xfs_healthmon *hm,
+ const struct xfs_healthmon_event *template)
+{
+ struct xfs_healthmon_event *event = NULL;
+ int error = 0;
+
+ /*
+ * Locklessly check if the health monitor has already detached from the
+ * mount. If so, ignore the event. If we race with deactivation,
+ * we'll queue the event but never send it.
+ */
+ if (hm->mount_cookie == DETACHED_MOUNT_COOKIE)
+ return -ESHUTDOWN;
+
+ mutex_lock(&hm->lock);
+
+ /* Report previously lost events before we do anything else */
+ if (hm->lost_prev_event) {
+ error = xfs_healthmon_clear_lost_prev(hm);
+ if (error)
+ goto out_unlock;
+ }
+
+ /* Try to merge with the newest event */
+ if (xfs_healthmon_merge_events(hm->last_event, template)) {
+ trace_xfs_healthmon_merge(hm, hm->last_event);
+ wake_up(&hm->wait);
+ goto out_unlock;
+ }
+
+ /* Only create a heap event object if we're not already at capacity. */
+ if (hm->events < XFS_HEALTHMON_MAX_EVENTS)
+ event = kmemdup(template, sizeof(struct xfs_healthmon_event),
+ GFP_NOFS);
+ if (!event) {
+ /* No memory means we lose the event */
+ trace_xfs_healthmon_lost_event(hm);
+ xfs_healthmon_bump_lost(hm);
+ error = -ENOMEM;
+ goto out_unlock;
+ }
+
+ __xfs_healthmon_push(hm, event);
+
+out_unlock:
+ mutex_unlock(&hm->lock);
+ return error;
+}
+
+/*
+ * Report that the filesystem is being unmounted, then detach the xfs mount
+ * from this healthmon instance.
+ */
+void
+xfs_healthmon_unmount(
+ struct xfs_mount *mp)
+{
+ struct xfs_healthmon *hm = xfs_healthmon_get(mp);
+
+ if (!hm)
+ return;
+
+ trace_xfs_healthmon_report_unmount(hm);
+
+ /*
+ * Insert the unmount notification at the start of the event queue so
+ * that userspace knows the filesystem went away as soon as possible.
+ * There's nothing actionable for userspace after an unmount. Once
+ * we've inserted the unmount event, hm no longer owns that event.
+ */
+ __xfs_healthmon_insert(hm, hm->unmount_event);
+ hm->unmount_event = NULL;
+
+ xfs_healthmon_detach(hm);
+ xfs_healthmon_put(hm);
+}
+
+/* Compute the reporting mask for non-unmount metadata health events. */
+static inline unsigned int
+metadata_event_mask(
+ struct xfs_healthmon *hm,
+ enum xfs_healthmon_type type,
+ unsigned int old_mask,
+ unsigned int new_mask)
+{
+ /* If we want all events, return all events. */
+ if (hm->verbose)
+ return new_mask;
+
+ switch (type) {
+ case XFS_HEALTHMON_SICK:
+ /* Always report runtime corruptions */
+ return new_mask;
+ case XFS_HEALTHMON_CORRUPT:
+ /* Only report new fsck errors */
+ return new_mask & ~old_mask;
+ case XFS_HEALTHMON_HEALTHY:
+ /* Only report healthy metadata that got fixed */
+ return new_mask & old_mask;
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ return 0;
+}
+
+/* Report XFS_FS_SICK_* events to healthmon */
+void
+xfs_healthmon_report_fs(
+ struct xfs_mount *mp,
+ enum xfs_healthmon_type type,
+ unsigned int old_mask,
+ unsigned int new_mask)
+{
+ struct xfs_healthmon_event event = {
+ .type = type,
+ .domain = XFS_HEALTHMON_FS,
+ };
+ struct xfs_healthmon *hm = xfs_healthmon_get(mp);
+
+ if (!hm)
+ return;
+
+ event.fsmask = metadata_event_mask(hm, type, old_mask, new_mask) &
+ ~XFS_SICK_FS_SECONDARY;
+ trace_xfs_healthmon_report_fs(hm, old_mask, new_mask, &event);
+
+ if (event.fsmask)
+ xfs_healthmon_push(hm, &event);
+
+ xfs_healthmon_put(hm);
+}
+
+/* Report XFS_SICK_(AG|RG)* flags to healthmon */
+void
+xfs_healthmon_report_group(
+ struct xfs_group *xg,
+ enum xfs_healthmon_type type,
+ unsigned int old_mask,
+ unsigned int new_mask)
+{
+ struct xfs_healthmon_event event = {
+ .type = type,
+ .group = xg->xg_gno,
+ };
+ struct xfs_healthmon *hm = xfs_healthmon_get(xg->xg_mount);
+
+ if (!hm)
+ return;
+
+ switch (xg->xg_type) {
+ case XG_TYPE_RTG:
+ event.domain = XFS_HEALTHMON_RTGROUP;
+ event.grpmask = metadata_event_mask(hm, type, old_mask,
+ new_mask) &
+ ~XFS_SICK_RG_SECONDARY;
+ break;
+ case XG_TYPE_AG:
+ event.domain = XFS_HEALTHMON_AG;
+ event.grpmask = metadata_event_mask(hm, type, old_mask,
+ new_mask) &
+ ~XFS_SICK_AG_SECONDARY;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ trace_xfs_healthmon_report_group(hm, old_mask, new_mask, &event);
+
+ if (event.grpmask)
+ xfs_healthmon_push(hm, &event);
+
+ xfs_healthmon_put(hm);
+}
+
+/* Report XFS_SICK_INO_* flags to healthmon */
+void
+xfs_healthmon_report_inode(
+ struct xfs_inode *ip,
+ enum xfs_healthmon_type type,
+ unsigned int old_mask,
+ unsigned int new_mask)
+{
+ struct xfs_healthmon_event event = {
+ .type = type,
+ .domain = XFS_HEALTHMON_INODE,
+ .ino = ip->i_ino,
+ .gen = VFS_I(ip)->i_generation,
+ };
+ struct xfs_healthmon *hm = xfs_healthmon_get(ip->i_mount);
+
+ if (!hm)
+ return;
+
+ event.imask = metadata_event_mask(hm, type, old_mask, new_mask) &
+ ~XFS_SICK_INO_SECONDARY;
+ trace_xfs_healthmon_report_inode(hm, old_mask, event.imask, &event);
+
+ if (event.imask)
+ xfs_healthmon_push(hm, &event);
+
+ xfs_healthmon_put(hm);
+}
+
+/* Add a shutdown event to the reporting queue. */
+void
+xfs_healthmon_report_shutdown(
+ struct xfs_mount *mp,
+ uint32_t flags)
+{
+ struct xfs_healthmon_event event = {
+ .type = XFS_HEALTHMON_SHUTDOWN,
+ .domain = XFS_HEALTHMON_MOUNT,
+ .flags = flags,
+ };
+ struct xfs_healthmon *hm = xfs_healthmon_get(mp);
+
+ if (!hm)
+ return;
+
+ trace_xfs_healthmon_report_shutdown(hm, flags);
+
+ xfs_healthmon_push(hm, &event);
+ xfs_healthmon_put(hm);
+}
+
+static inline enum xfs_healthmon_domain
+media_error_domain(
+ enum xfs_device fdev)
+{
+ switch (fdev) {
+ case XFS_DEV_DATA:
+ return XFS_HEALTHMON_DATADEV;
+ case XFS_DEV_LOG:
+ return XFS_HEALTHMON_LOGDEV;
+ case XFS_DEV_RT:
+ return XFS_HEALTHMON_RTDEV;
+ }
+
+ ASSERT(0);
+ return 0;
+}
+
+/* Add a media error event to the reporting queue. */
+void
+xfs_healthmon_report_media(
+ struct xfs_mount *mp,
+ enum xfs_device fdev,
+ xfs_daddr_t daddr,
+ uint64_t bbcount)
+{
+ struct xfs_healthmon_event event = {
+ .type = XFS_HEALTHMON_MEDIA_ERROR,
+ .domain = media_error_domain(fdev),
+ .daddr = daddr,
+ .bbcount = bbcount,
+ };
+ struct xfs_healthmon *hm = xfs_healthmon_get(mp);
+
+ if (!hm)
+ return;
+
+ trace_xfs_healthmon_report_media(hm, fdev, &event);
+
+ xfs_healthmon_push(hm, &event);
+ xfs_healthmon_put(hm);
+}
+
+static inline enum xfs_healthmon_type file_ioerr_type(enum fserror_type action)
+{
+ switch (action) {
+ case FSERR_BUFFERED_READ:
+ return XFS_HEALTHMON_BUFREAD;
+ case FSERR_BUFFERED_WRITE:
+ return XFS_HEALTHMON_BUFWRITE;
+ case FSERR_DIRECTIO_READ:
+ return XFS_HEALTHMON_DIOREAD;
+ case FSERR_DIRECTIO_WRITE:
+ return XFS_HEALTHMON_DIOWRITE;
+ case FSERR_DATA_LOST:
+ return XFS_HEALTHMON_DATALOST;
+ case FSERR_METADATA:
+ /* filtered out by xfs_fs_report_error */
+ break;
+ }
+
+ ASSERT(0);
+ return -1;
+}
+
+/* Add a file io error event to the reporting queue. */
+void
+xfs_healthmon_report_file_ioerror(
+ struct xfs_inode *ip,
+ const struct fserror_event *p)
+{
+ struct xfs_healthmon_event event = {
+ .type = file_ioerr_type(p->type),
+ .domain = XFS_HEALTHMON_FILERANGE,
+ .fino = ip->i_ino,
+ .fgen = VFS_I(ip)->i_generation,
+ .fpos = p->pos,
+ .flen = p->len,
+ /* send positive error number to userspace */
+ .error = -p->error,
+ };
+ struct xfs_healthmon *hm = xfs_healthmon_get(ip->i_mount);
+
+ if (!hm)
+ return;
+
+ trace_xfs_healthmon_report_file_ioerror(hm, p);
+
+ xfs_healthmon_push(hm, &event);
+ xfs_healthmon_put(hm);
+}
+
+static inline void
+xfs_healthmon_reset_outbuf(
+ struct xfs_healthmon *hm)
+{
+ hm->buftail = 0;
+ hm->bufhead = 0;
+}
+
+struct flags_map {
+ unsigned int in_mask;
+ unsigned int out_mask;
+};
+
+static const struct flags_map shutdown_map[] = {
+ { SHUTDOWN_META_IO_ERROR, XFS_HEALTH_SHUTDOWN_META_IO_ERROR },
+ { SHUTDOWN_LOG_IO_ERROR, XFS_HEALTH_SHUTDOWN_LOG_IO_ERROR },
+ { SHUTDOWN_FORCE_UMOUNT, XFS_HEALTH_SHUTDOWN_FORCE_UMOUNT },
+ { SHUTDOWN_CORRUPT_INCORE, XFS_HEALTH_SHUTDOWN_CORRUPT_INCORE },
+ { SHUTDOWN_CORRUPT_ONDISK, XFS_HEALTH_SHUTDOWN_CORRUPT_ONDISK },
+ { SHUTDOWN_DEVICE_REMOVED, XFS_HEALTH_SHUTDOWN_DEVICE_REMOVED },
+};
+
+static inline unsigned int
+__map_flags(
+ const struct flags_map *map,
+ size_t array_len,
+ unsigned int flags)
+{
+ const struct flags_map *m;
+ unsigned int ret = 0;
+
+ for (m = map; m < map + array_len; m++) {
+ if (flags & m->in_mask)
+ ret |= m->out_mask;
+ }
+
+ return ret;
+}
+
+#define map_flags(map, flags) __map_flags((map), ARRAY_SIZE(map), (flags))
+
+static inline unsigned int shutdown_mask(unsigned int in)
+{
+ return map_flags(shutdown_map, in);
+}
+
+static const unsigned int domain_map[] = {
+ [XFS_HEALTHMON_MOUNT] = XFS_HEALTH_MONITOR_DOMAIN_MOUNT,
+ [XFS_HEALTHMON_FS] = XFS_HEALTH_MONITOR_DOMAIN_FS,
+ [XFS_HEALTHMON_AG] = XFS_HEALTH_MONITOR_DOMAIN_AG,
+ [XFS_HEALTHMON_INODE] = XFS_HEALTH_MONITOR_DOMAIN_INODE,
+ [XFS_HEALTHMON_RTGROUP] = XFS_HEALTH_MONITOR_DOMAIN_RTGROUP,
+ [XFS_HEALTHMON_DATADEV] = XFS_HEALTH_MONITOR_DOMAIN_DATADEV,
+ [XFS_HEALTHMON_RTDEV] = XFS_HEALTH_MONITOR_DOMAIN_RTDEV,
+ [XFS_HEALTHMON_LOGDEV] = XFS_HEALTH_MONITOR_DOMAIN_LOGDEV,
+ [XFS_HEALTHMON_FILERANGE] = XFS_HEALTH_MONITOR_DOMAIN_FILERANGE,
+};
+
+static const unsigned int type_map[] = {
+ [XFS_HEALTHMON_RUNNING] = XFS_HEALTH_MONITOR_TYPE_RUNNING,
+ [XFS_HEALTHMON_LOST] = XFS_HEALTH_MONITOR_TYPE_LOST,
+ [XFS_HEALTHMON_SICK] = XFS_HEALTH_MONITOR_TYPE_SICK,
+ [XFS_HEALTHMON_CORRUPT] = XFS_HEALTH_MONITOR_TYPE_CORRUPT,
+ [XFS_HEALTHMON_HEALTHY] = XFS_HEALTH_MONITOR_TYPE_HEALTHY,
+ [XFS_HEALTHMON_UNMOUNT] = XFS_HEALTH_MONITOR_TYPE_UNMOUNT,
+ [XFS_HEALTHMON_SHUTDOWN] = XFS_HEALTH_MONITOR_TYPE_SHUTDOWN,
+ [XFS_HEALTHMON_MEDIA_ERROR] = XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR,
+ [XFS_HEALTHMON_BUFREAD] = XFS_HEALTH_MONITOR_TYPE_BUFREAD,
+ [XFS_HEALTHMON_BUFWRITE] = XFS_HEALTH_MONITOR_TYPE_BUFWRITE,
+ [XFS_HEALTHMON_DIOREAD] = XFS_HEALTH_MONITOR_TYPE_DIOREAD,
+ [XFS_HEALTHMON_DIOWRITE] = XFS_HEALTH_MONITOR_TYPE_DIOWRITE,
+ [XFS_HEALTHMON_DATALOST] = XFS_HEALTH_MONITOR_TYPE_DATALOST,
+};
+
+/* Render event as a V0 structure */
+STATIC int
+xfs_healthmon_format_v0(
+ struct xfs_healthmon *hm,
+ const struct xfs_healthmon_event *event)
+{
+ struct xfs_health_monitor_event hme = {
+ .time_ns = event->time_ns,
+ };
+
+ trace_xfs_healthmon_format(hm, event);
+
+ if (event->domain < 0 || event->domain >= ARRAY_SIZE(domain_map) ||
+ event->type < 0 || event->type >= ARRAY_SIZE(type_map))
+ return -EFSCORRUPTED;
+
+ hme.domain = domain_map[event->domain];
+ hme.type = type_map[event->type];
+
+ /* fill in the event-specific details */
+ switch (event->domain) {
+ case XFS_HEALTHMON_MOUNT:
+ switch (event->type) {
+ case XFS_HEALTHMON_LOST:
+ hme.e.lost.count = event->lostcount;
+ break;
+ case XFS_HEALTHMON_SHUTDOWN:
+ hme.e.shutdown.reasons = shutdown_mask(event->flags);
+ break;
+ default:
+ break;
+ }
+ break;
+ case XFS_HEALTHMON_FS:
+ hme.e.fs.mask = xfs_healthmon_fs_mask(event->fsmask);
+ break;
+ case XFS_HEALTHMON_RTGROUP:
+ hme.e.group.mask = xfs_healthmon_rtgroup_mask(event->grpmask);
+ hme.e.group.gno = event->group;
+ break;
+ case XFS_HEALTHMON_AG:
+ hme.e.group.mask = xfs_healthmon_perag_mask(event->grpmask);
+ hme.e.group.gno = event->group;
+ break;
+ case XFS_HEALTHMON_INODE:
+ hme.e.inode.mask = xfs_healthmon_inode_mask(event->imask);
+ hme.e.inode.ino = event->ino;
+ hme.e.inode.gen = event->gen;
+ break;
+ case XFS_HEALTHMON_DATADEV:
+ case XFS_HEALTHMON_LOGDEV:
+ case XFS_HEALTHMON_RTDEV:
+ hme.e.media.daddr = event->daddr;
+ hme.e.media.bbcount = event->bbcount;
+ break;
+ case XFS_HEALTHMON_FILERANGE:
+ hme.e.filerange.ino = event->fino;
+ hme.e.filerange.gen = event->fgen;
+ hme.e.filerange.pos = event->fpos;
+ hme.e.filerange.len = event->flen;
+ hme.e.filerange.error = abs(event->error);
+ break;
+ default:
+ break;
+ }
+
+ ASSERT(hm->bufhead + sizeof(hme) <= hm->bufsize);
+
+ /* copy formatted object to the outbuf */
+ if (hm->bufhead + sizeof(hme) <= hm->bufsize) {
+ memcpy(hm->buffer + hm->bufhead, &hme, sizeof(hme));
+ hm->bufhead += sizeof(hme);
+ }
+
+ return 0;
+}
+
+/* How many bytes are waiting in the outbuf to be copied? */
+static inline size_t
+xfs_healthmon_outbuf_bytes(
+ struct xfs_healthmon *hm)
+{
+ if (hm->bufhead > hm->buftail)
+ return hm->bufhead - hm->buftail;
+ return 0;
+}
+
+/*
+ * Do we have something for userspace to read? This can mean unmount events,
+ * events pending in the queue, or pending bytes in the outbuf.
+ */
+static inline bool
+xfs_healthmon_has_eventdata(
+ struct xfs_healthmon *hm)
+{
+ /*
+ * If the health monitor is already detached from the xfs_mount, we
+ * want reads to return 0 bytes even if there are no events, because
+ * userspace interprets that as EOF. If we race with deactivation,
+ * read_iter will take the necessary locks to discover that there are
+ * no events to send.
+ */
+ if (hm->mount_cookie == DETACHED_MOUNT_COOKIE)
+ return true;
+
+ /*
+ * Either there are events waiting to be formatted into the buffer, or
+ * there's unread bytes in the buffer.
+ */
+ return hm->events > 0 || xfs_healthmon_outbuf_bytes(hm) > 0;
+}
+
+/* Try to copy the rest of the outbuf to the iov iter. */
+STATIC ssize_t
+xfs_healthmon_copybuf(
+ struct xfs_healthmon *hm,
+ struct iov_iter *to)
+{
+ size_t to_copy;
+ size_t w = 0;
+
+ trace_xfs_healthmon_copybuf(hm, to);
+
+ to_copy = xfs_healthmon_outbuf_bytes(hm);
+ if (to_copy) {
+ w = copy_to_iter(hm->buffer + hm->buftail, to_copy, to);
+ if (!w)
+ return -EFAULT;
+
+ hm->buftail += w;
+ }
+
+ /*
+ * Nothing left to copy? Reset the output buffer cursors to the start
+ * since there's no live data in the buffer.
+ */
+ if (xfs_healthmon_outbuf_bytes(hm) == 0)
+ xfs_healthmon_reset_outbuf(hm);
+ return w;
+}
+
+/*
+ * Return a health monitoring event for formatting into the output buffer if
+ * there's enough space in the outbuf and an event waiting for us. Caller
+ * must hold i_rwsem on the healthmon file.
+ */
+static inline struct xfs_healthmon_event *
+xfs_healthmon_format_pop(
+ struct xfs_healthmon *hm)
+{
+ struct xfs_healthmon_event *event;
+
+ if (hm->bufhead + sizeof(*event) > hm->bufsize)
+ return NULL;
+
+ mutex_lock(&hm->lock);
+ event = hm->first_event;
+ if (event) {
+ if (hm->last_event == event)
+ hm->last_event = NULL;
+ hm->first_event = event->next;
+ hm->events--;
+
+ trace_xfs_healthmon_pop(hm, event);
+ }
+ mutex_unlock(&hm->lock);
+ return event;
+}
+
+/* Allocate formatting buffer */
+STATIC int
+xfs_healthmon_alloc_outbuf(
+ struct xfs_healthmon *hm,
+ size_t user_bufsize)
+{
+ void *outbuf;
+ size_t bufsize =
+ min(XFS_HEALTHMON_MAX_OUTBUF, max(PAGE_SIZE, user_bufsize));
+
+ outbuf = kzalloc(bufsize, GFP_KERNEL);
+ if (!outbuf) {
+ if (bufsize == PAGE_SIZE)
+ return -ENOMEM;
+
+ bufsize = PAGE_SIZE;
+ outbuf = kzalloc(bufsize, GFP_KERNEL);
+ if (!outbuf)
+ return -ENOMEM;
+ }
+
+ hm->buffer = outbuf;
+ hm->bufsize = bufsize;
+ hm->bufhead = 0;
+ hm->buftail = 0;
+
+ return 0;
+}
+
+/*
+ * Convey queued event data to userspace. First copy any remaining bytes in
+ * the outbuf, then format the oldest event into the outbuf and copy that too.
+ */
+STATIC ssize_t
+xfs_healthmon_read_iter(
+ struct kiocb *iocb,
+ struct iov_iter *to)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ struct xfs_healthmon *hm = file->private_data;
+ struct xfs_healthmon_event *event;
+ size_t copied = 0;
+ ssize_t ret = 0;
+
+ if (file->f_flags & O_NONBLOCK) {
+ if (!xfs_healthmon_has_eventdata(hm) || !inode_trylock(inode))
+ return -EAGAIN;
+ } else {
+ ret = wait_event_interruptible(hm->wait,
+ xfs_healthmon_has_eventdata(hm));
+ if (ret)
+ return ret;
+
+ inode_lock(inode);
+ }
+
+ if (hm->bufsize == 0) {
+ ret = xfs_healthmon_alloc_outbuf(hm, iov_iter_count(to));
+ if (ret)
+ goto out_unlock;
+ }
+
+ trace_xfs_healthmon_read_start(hm);
+
+ /*
+ * If there's anything left in the output buffer, copy that before
+ * formatting more events.
+ */
+ ret = xfs_healthmon_copybuf(hm, to);
+ if (ret < 0)
+ goto out_unlock;
+ copied += ret;
+
+ while (iov_iter_count(to) > 0) {
+ /* Format the next events into the outbuf until it's full. */
+ while ((event = xfs_healthmon_format_pop(hm)) != NULL) {
+ ret = xfs_healthmon_format_v0(hm, event);
+ kfree(event);
+ if (ret)
+ goto out_unlock;
+ }
+
+ /* Copy anything formatted into outbuf to userspace */
+ ret = xfs_healthmon_copybuf(hm, to);
+ if (ret <= 0)
+ break;
+
+ copied += ret;
+ }
+
+out_unlock:
+ trace_xfs_healthmon_read_finish(hm);
+ inode_unlock(inode);
+ return copied ?: ret;
+}
+
+/* Poll for available events. */
+STATIC __poll_t
+xfs_healthmon_poll(
+ struct file *file,
+ struct poll_table_struct *wait)
+{
+ struct xfs_healthmon *hm = file->private_data;
+ __poll_t mask = 0;
+
+ poll_wait(file, &hm->wait, wait);
+
+ if (xfs_healthmon_has_eventdata(hm))
+ mask |= EPOLLIN;
+ return mask;
+}
+
+/* Free the health monitoring information. */
+STATIC int
+xfs_healthmon_release(
+ struct inode *inode,
+ struct file *file)
+{
+ struct xfs_healthmon *hm = file->private_data;
+
+ trace_xfs_healthmon_release(hm);
+
+ /*
+ * We might be closing the healthmon file before the filesystem
+ * unmounts, because userspace processes can terminate at any time and
+ * for any reason. Null out xfs_mount::m_healthmon so that another
+ * process can create another health monitor file.
+ */
+ xfs_healthmon_detach(hm);
+
+ /*
+ * Wake up any readers that might be left. There shouldn't be any
+ * because the only users of the waiter are read and poll.
+ */
+ wake_up_all(&hm->wait);
+
+ xfs_healthmon_put(hm);
+ return 0;
+}
+
+/* Validate ioctl parameters. */
+static inline bool
+xfs_healthmon_validate(
+ const struct xfs_health_monitor *hmo)
+{
+ if (hmo->flags & ~XFS_HEALTH_MONITOR_ALL)
+ return false;
+ if (hmo->format != XFS_HEALTH_MONITOR_FMT_V0)
+ return false;
+ if (memchr_inv(&hmo->pad, 0, sizeof(hmo->pad)))
+ return false;
+ return true;
+}
+
+/* Emit some data about the health monitoring fd. */
+static void
+xfs_healthmon_show_fdinfo(
+ struct seq_file *m,
+ struct file *file)
+{
+ struct xfs_healthmon *hm = file->private_data;
+
+ mutex_lock(&hm->lock);
+ seq_printf(m, "state:\t%s\ndev:\t%d:%d\nformat:\tv0\nevents:\t%llu\nlost:\t%llu\n",
+ hm->mount_cookie == DETACHED_MOUNT_COOKIE ?
+ "dead" : "alive",
+ MAJOR(hm->dev), MINOR(hm->dev),
+ hm->total_events,
+ hm->total_lost);
+ mutex_unlock(&hm->lock);
+}
+
+/* Reconfigure the health monitor. */
+STATIC long
+xfs_healthmon_reconfigure(
+ struct file *file,
+ unsigned int cmd,
+ void __user *arg)
+{
+ struct xfs_health_monitor hmo;
+ struct xfs_healthmon *hm = file->private_data;
+
+ if (copy_from_user(&hmo, arg, sizeof(hmo)))
+ return -EFAULT;
+
+ if (!xfs_healthmon_validate(&hmo))
+ return -EINVAL;
+
+ mutex_lock(&hm->lock);
+ hm->verbose = !!(hmo.flags & XFS_HEALTH_MONITOR_VERBOSE);
+ mutex_unlock(&hm->lock);
+
+ return 0;
+}
+
+/* Does the fd point to the same filesystem as the one we're monitoring? */
+STATIC long
+xfs_healthmon_file_on_monitored_fs(
+ struct file *file,
+ unsigned int cmd,
+ void __user *arg)
+{
+ struct xfs_health_file_on_monitored_fs hms;
+ struct xfs_healthmon *hm = file->private_data;
+ struct inode *hms_inode;
+
+ if (copy_from_user(&hms, arg, sizeof(hms)))
+ return -EFAULT;
+
+ if (hms.flags)
+ return -EINVAL;
+
+ CLASS(fd, hms_fd)(hms.fd);
+ if (fd_empty(hms_fd))
+ return -EBADF;
+
+ hms_inode = file_inode(fd_file(hms_fd));
+ mutex_lock(&hm->lock);
+ if (hm->mount_cookie != (uintptr_t)hms_inode->i_sb) {
+ mutex_unlock(&hm->lock);
+ return -ESTALE;
+ }
+
+ mutex_unlock(&hm->lock);
+ return 0;
+}
+
+/* Handle ioctls for the health monitoring thread. */
+STATIC long
+xfs_healthmon_ioctl(
+ struct file *file,
+ unsigned int cmd,
+ unsigned long p)
+{
+ void __user *arg = (void __user *)p;
+
+ switch (cmd) {
+ case XFS_IOC_HEALTH_MONITOR:
+ return xfs_healthmon_reconfigure(file, cmd, arg);
+ case XFS_IOC_HEALTH_FD_ON_MONITORED_FS:
+ return xfs_healthmon_file_on_monitored_fs(file, cmd, arg);
+ default:
+ break;
+ }
+
+ return -ENOTTY;
+}
+
+static const struct file_operations xfs_healthmon_fops = {
+ .owner = THIS_MODULE,
+ .show_fdinfo = xfs_healthmon_show_fdinfo,
+ .read_iter = xfs_healthmon_read_iter,
+ .poll = xfs_healthmon_poll,
+ .release = xfs_healthmon_release,
+ .unlocked_ioctl = xfs_healthmon_ioctl,
+};
+
+/*
+ * Create a health monitoring file. Returns an index to the fd table or a
+ * negative errno.
+ */
+long
+xfs_ioc_health_monitor(
+ struct file *file,
+ struct xfs_health_monitor __user *arg)
+{
+ struct xfs_health_monitor hmo;
+ struct xfs_healthmon_event *running_event;
+ struct xfs_healthmon *hm;
+ struct xfs_inode *ip = XFS_I(file_inode(file));
+ struct xfs_mount *mp = ip->i_mount;
+ int ret;
+
+ /*
+ * The only intended user of the health monitoring system should be the
+ * xfs_healer daemon running on behalf of the whole filesystem in the
+ * initial user namespace. IOWs, we don't allow unprivileged userspace
+ * (they can use fsnotify) nor do we allow containers.
+ */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (ip->i_ino != mp->m_sb.sb_rootino)
+ return -EPERM;
+ if (current_user_ns() != &init_user_ns)
+ return -EPERM;
+
+ if (copy_from_user(&hmo, arg, sizeof(hmo)))
+ return -EFAULT;
+
+ if (!xfs_healthmon_validate(&hmo))
+ return -EINVAL;
+
+ hm = kzalloc(sizeof(*hm), GFP_KERNEL);
+ if (!hm)
+ return -ENOMEM;
+ hm->dev = mp->m_super->s_dev;
+ refcount_set(&hm->ref, 1);
+
+ mutex_init(&hm->lock);
+ init_waitqueue_head(&hm->wait);
+
+ if (hmo.flags & XFS_HEALTH_MONITOR_VERBOSE)
+ hm->verbose = true;
+
+ /* Queue up the first event that lets the client know we're running. */
+ running_event = kzalloc(sizeof(struct xfs_healthmon_event), GFP_NOFS);
+ if (!running_event) {
+ ret = -ENOMEM;
+ goto out_hm;
+ }
+ running_event->type = XFS_HEALTHMON_RUNNING;
+ running_event->domain = XFS_HEALTHMON_MOUNT;
+ __xfs_healthmon_insert(hm, running_event);
+
+ /*
+ * Preallocate the unmount event so that we can't fail to notify the
+ * filesystem later. This is key for triggering fast exit of the
+ * xfs_healer daemon.
+ */
+ hm->unmount_event = kzalloc(sizeof(struct xfs_healthmon_event),
+ GFP_NOFS);
+ if (!hm->unmount_event) {
+ ret = -ENOMEM;
+ goto out_hm;
+ }
+ hm->unmount_event->type = XFS_HEALTHMON_UNMOUNT;
+ hm->unmount_event->domain = XFS_HEALTHMON_MOUNT;
+
+ /*
+ * Try to attach this health monitor to the xfs_mount. The monitor is
+ * considered live and will receive events if this succeeds.
+ */
+ ret = xfs_healthmon_attach(mp, hm);
+ if (ret)
+ goto out_hm;
+
+ /*
+ * Create the anonymous file and install a fd for it. If it succeeds,
+ * the file owns hm and can go away at any time, so we must not access
+ * it again. This must go last because we can't undo a fd table
+ * installation.
+ */
+ ret = anon_inode_getfd("xfs_healthmon", &xfs_healthmon_fops, hm,
+ O_CLOEXEC | O_RDONLY);
+ if (ret < 0)
+ goto out_mp;
+
+ trace_xfs_healthmon_create(mp->m_super->s_dev, hmo.flags, hmo.format);
+
+ return ret;
+
+out_mp:
+ xfs_healthmon_detach(hm);
+out_hm:
+ ASSERT(refcount_read(&hm->ref) == 1);
+ xfs_healthmon_put(hm);
+ return ret;
+}
diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h
new file mode 100644
index 000000000000..0e936507037f
--- /dev/null
+++ b/fs/xfs/xfs_healthmon.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2024-2026 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_HEALTHMON_H__
+#define __XFS_HEALTHMON_H__
+
+struct xfs_healthmon {
+ /*
+ * Weak reference to the xfs filesystem that is being monitored. It
+ * will be set to zero when the filesystem detaches from the monitor.
+ * Do not dereference this pointer.
+ */
+ uintptr_t mount_cookie;
+
+ /*
+ * Device number of the filesystem being monitored. This is for
+ * consistent tracing even after unmount.
+ */
+ dev_t dev;
+
+ /*
+ * Reference count of this structure. The open healthmon fd holds one
+ * ref, the xfs_mount holds another ref if it points to this object,
+ * and running event handlers hold their own refs.
+ */
+ refcount_t ref;
+
+ /* lock for event list and event counters */
+ struct mutex lock;
+
+ /* list of event objects */
+ struct xfs_healthmon_event *first_event;
+ struct xfs_healthmon_event *last_event;
+
+ /* preallocated event for unmount */
+ struct xfs_healthmon_event *unmount_event;
+
+ /* number of events in the list */
+ unsigned int events;
+
+ /* do we want all events? */
+ bool verbose:1;
+
+ /* waiter so read/poll can sleep until the arrival of events */
+ struct wait_queue_head wait;
+
+ /*
+ * Buffer for formatting events for a read_iter call. Events are
+ * formatted into the buffer at bufhead, and buftail determines where
+ * to start a copy_iter to get those events to userspace. All buffer
+ * fields are protected by inode_lock.
+ */
+ char *buffer;
+ size_t bufsize;
+ size_t bufhead;
+ size_t buftail;
+
+ /* did we lose previous events? */
+ unsigned long long lost_prev_event;
+
+ /* total counts of events observed and lost events */
+ unsigned long long total_events;
+ unsigned long long total_lost;
+};
+
+void xfs_healthmon_unmount(struct xfs_mount *mp);
+
+enum xfs_healthmon_type {
+ XFS_HEALTHMON_RUNNING, /* monitor running */
+ XFS_HEALTHMON_LOST, /* message lost */
+ XFS_HEALTHMON_UNMOUNT, /* filesystem is unmounting */
+
+ /* filesystem shutdown */
+ XFS_HEALTHMON_SHUTDOWN,
+
+ /* metadata health events */
+ XFS_HEALTHMON_SICK, /* runtime corruption observed */
+ XFS_HEALTHMON_CORRUPT, /* fsck reported corruption */
+ XFS_HEALTHMON_HEALTHY, /* fsck reported healthy structure */
+
+ /* media errors */
+ XFS_HEALTHMON_MEDIA_ERROR,
+
+ /* file range events */
+ XFS_HEALTHMON_BUFREAD,
+ XFS_HEALTHMON_BUFWRITE,
+ XFS_HEALTHMON_DIOREAD,
+ XFS_HEALTHMON_DIOWRITE,
+ XFS_HEALTHMON_DATALOST,
+};
+
+enum xfs_healthmon_domain {
+ XFS_HEALTHMON_MOUNT, /* affects the whole fs */
+
+ /* metadata health events */
+ XFS_HEALTHMON_FS, /* main filesystem metadata */
+ XFS_HEALTHMON_AG, /* allocation group metadata */
+ XFS_HEALTHMON_INODE, /* inode metadata */
+ XFS_HEALTHMON_RTGROUP, /* realtime group metadata */
+
+ /* media errors */
+ XFS_HEALTHMON_DATADEV,
+ XFS_HEALTHMON_RTDEV,
+ XFS_HEALTHMON_LOGDEV,
+
+ /* file range events */
+ XFS_HEALTHMON_FILERANGE,
+};
+
+struct xfs_healthmon_event {
+ struct xfs_healthmon_event *next;
+
+ enum xfs_healthmon_type type;
+ enum xfs_healthmon_domain domain;
+
+ uint64_t time_ns;
+
+ union {
+ /* lost events */
+ struct {
+ uint64_t lostcount;
+ };
+ /* fs/rt metadata */
+ struct {
+ /* XFS_SICK_* flags */
+ unsigned int fsmask;
+ };
+ /* ag/rtgroup metadata */
+ struct {
+ /* XFS_SICK_(AG|RG)* flags */
+ unsigned int grpmask;
+ unsigned int group;
+ };
+ /* inode metadata */
+ struct {
+ /* XFS_SICK_INO_* flags */
+ unsigned int imask;
+ uint32_t gen;
+ xfs_ino_t ino;
+ };
+ /* shutdown */
+ struct {
+ unsigned int flags;
+ };
+ /* media errors */
+ struct {
+ xfs_daddr_t daddr;
+ uint64_t bbcount;
+ };
+ /* file range events */
+ struct {
+ xfs_ino_t fino;
+ loff_t fpos;
+ uint64_t flen;
+ uint32_t fgen;
+ int error;
+ };
+ };
+};
+
+void xfs_healthmon_report_fs(struct xfs_mount *mp,
+ enum xfs_healthmon_type type, unsigned int old_mask,
+ unsigned int new_mask);
+void xfs_healthmon_report_group(struct xfs_group *xg,
+ enum xfs_healthmon_type type, unsigned int old_mask,
+ unsigned int new_mask);
+void xfs_healthmon_report_inode(struct xfs_inode *ip,
+ enum xfs_healthmon_type type, unsigned int old_mask,
+ unsigned int new_mask);
+
+void xfs_healthmon_report_shutdown(struct xfs_mount *mp, uint32_t flags);
+
+void xfs_healthmon_report_media(struct xfs_mount *mp, enum xfs_device fdev,
+ xfs_daddr_t daddr, uint64_t bbcount);
+
+void xfs_healthmon_report_file_ioerror(struct xfs_inode *ip,
+ const struct fserror_event *p);
+
+long xfs_ioc_health_monitor(struct file *file,
+ struct xfs_health_monitor __user *arg);
+
+#endif /* __XFS_HEALTHMON_H__ */
diff --git a/fs/xfs/xfs_hooks.c b/fs/xfs/xfs_hooks.c
index a58d1de2d37d..a09109e692b1 100644
--- a/fs/xfs/xfs_hooks.c
+++ b/fs/xfs/xfs_hooks.c
@@ -3,7 +3,7 @@
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 23a920437fe4..dbaab4ae709f 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index f83ec2bd0583..95b0eba242e9 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -3,7 +3,7 @@
* Copyright (c) 2008-2010, 2013 Dave Chinner
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -49,13 +49,11 @@ xfs_icreate_item_size(
STATIC void
xfs_icreate_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_icreate_item *icp = ICR_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICREATE,
- &icp->ic_format,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_ICREATE, &icp->ic_format,
sizeof(struct xfs_icreate_log));
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index f1f88e48fe22..50c0404f9064 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -5,7 +5,7 @@
*/
#include <linux/iversion.h>
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 2eb0c6011a2e..8913036b8024 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -336,8 +336,7 @@ STATIC void
xfs_inode_item_format_data_fork(
struct xfs_inode_log_item *iip,
struct xfs_inode_log_format *ilf,
- struct xfs_log_vec *lv,
- struct xfs_log_iovec **vecp)
+ struct xlog_format_buf *lfb)
{
struct xfs_inode *ip = iip->ili_inode;
size_t data_bytes;
@@ -354,9 +353,9 @@ xfs_inode_item_format_data_fork(
ASSERT(xfs_iext_count(&ip->i_df) > 0);
- p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
+ p = xlog_format_start(lfb, XLOG_REG_TYPE_IEXT);
data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
- xlog_finish_iovec(lv, *vecp, data_bytes);
+ xlog_format_commit(lfb, data_bytes);
ASSERT(data_bytes <= ip->i_df.if_bytes);
@@ -374,7 +373,7 @@ xfs_inode_item_format_data_fork(
if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
ip->i_df.if_broot_bytes > 0) {
ASSERT(ip->i_df.if_broot != NULL);
- xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_IBROOT,
ip->i_df.if_broot,
ip->i_df.if_broot_bytes);
ilf->ilf_dsize = ip->i_df.if_broot_bytes;
@@ -392,8 +391,9 @@ xfs_inode_item_format_data_fork(
ip->i_df.if_bytes > 0) {
ASSERT(ip->i_df.if_data != NULL);
ASSERT(ip->i_disk_size > 0);
- xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
- ip->i_df.if_data, ip->i_df.if_bytes);
+ xlog_format_copy(lfb, XLOG_REG_TYPE_ILOCAL,
+ ip->i_df.if_data,
+ ip->i_df.if_bytes);
ilf->ilf_dsize = (unsigned)ip->i_df.if_bytes;
ilf->ilf_size++;
} else {
@@ -416,8 +416,7 @@ STATIC void
xfs_inode_item_format_attr_fork(
struct xfs_inode_log_item *iip,
struct xfs_inode_log_format *ilf,
- struct xfs_log_vec *lv,
- struct xfs_log_iovec **vecp)
+ struct xlog_format_buf *lfb)
{
struct xfs_inode *ip = iip->ili_inode;
size_t data_bytes;
@@ -435,9 +434,9 @@ xfs_inode_item_format_attr_fork(
ASSERT(xfs_iext_count(&ip->i_af) ==
ip->i_af.if_nextents);
- p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
+ p = xlog_format_start(lfb, XLOG_REG_TYPE_IATTR_EXT);
data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
- xlog_finish_iovec(lv, *vecp, data_bytes);
+ xlog_format_commit(lfb, data_bytes);
ilf->ilf_asize = data_bytes;
ilf->ilf_size++;
@@ -453,7 +452,7 @@ xfs_inode_item_format_attr_fork(
ip->i_af.if_broot_bytes > 0) {
ASSERT(ip->i_af.if_broot != NULL);
- xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_IATTR_BROOT,
ip->i_af.if_broot,
ip->i_af.if_broot_bytes);
ilf->ilf_asize = ip->i_af.if_broot_bytes;
@@ -469,8 +468,9 @@ xfs_inode_item_format_attr_fork(
if ((iip->ili_fields & XFS_ILOG_ADATA) &&
ip->i_af.if_bytes > 0) {
ASSERT(ip->i_af.if_data != NULL);
- xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
- ip->i_af.if_data, ip->i_af.if_bytes);
+ xlog_format_copy(lfb, XLOG_REG_TYPE_IATTR_LOCAL,
+ ip->i_af.if_data,
+ ip->i_af.if_bytes);
ilf->ilf_asize = (unsigned)ip->i_af.if_bytes;
ilf->ilf_size++;
} else {
@@ -619,14 +619,13 @@ xfs_inode_to_log_dinode(
static void
xfs_inode_item_format_core(
struct xfs_inode *ip,
- struct xfs_log_vec *lv,
- struct xfs_log_iovec **vecp)
+ struct xlog_format_buf *lfb)
{
struct xfs_log_dinode *dic;
- dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
+ dic = xlog_format_start(lfb, XLOG_REG_TYPE_ICORE);
xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);
- xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_mount));
+ xlog_format_commit(lfb, xfs_log_dinode_size(ip->i_mount));
}
/*
@@ -644,14 +643,13 @@ xfs_inode_item_format_core(
STATIC void
xfs_inode_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
- struct xfs_log_iovec *vecp = NULL;
struct xfs_inode_log_format *ilf;
- ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT);
+ ilf = xlog_format_start(lfb, XLOG_REG_TYPE_IFORMAT);
ilf->ilf_type = XFS_LI_INODE;
ilf->ilf_ino = ip->i_ino;
ilf->ilf_blkno = ip->i_imap.im_blkno;
@@ -668,13 +666,12 @@ xfs_inode_item_format(
ilf->ilf_asize = 0;
ilf->ilf_pad = 0;
memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u));
+ xlog_format_commit(lfb, sizeof(*ilf));
- xlog_finish_iovec(lv, vecp, sizeof(*ilf));
-
- xfs_inode_item_format_core(ip, lv, &vecp);
- xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
+ xfs_inode_item_format_core(ip, lfb);
+ xfs_inode_item_format_data_fork(iip, ilf, lfb);
if (xfs_inode_has_attr_fork(ip)) {
- xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
+ xfs_inode_item_format_attr_fork(iip, ilf, lfb);
} else {
iip->ili_fields &=
~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index 9d1999d41be1..5d93228783eb 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 59eaad774371..4eeda4d4e3ab 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -41,6 +41,8 @@
#include "xfs_exchrange.h"
#include "xfs_handle.h"
#include "xfs_rtgroup.h"
+#include "xfs_healthmon.h"
+#include "xfs_verify_media.h"
#include <linux/mount.h>
#include <linux/fileattr.h>
@@ -1419,6 +1421,11 @@ xfs_file_ioctl(
case XFS_IOC_COMMIT_RANGE:
return xfs_ioc_commit_range(filp, arg);
+ case XFS_IOC_HEALTH_MONITOR:
+ return xfs_ioc_health_monitor(filp, arg);
+ case XFS_IOC_VERIFY_MEDIA:
+ return xfs_ioc_verify_media(filp, arg);
+
default:
return -ENOTTY;
}
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index b64785dc4354..c66e192448a8 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -5,7 +5,7 @@
*/
#include <linux/mount.h>
#include <linux/fsmap.h>
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 37a1b33e9045..be86d43044df 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -4,7 +4,7 @@
* Copyright (c) 2016-2018 Christoph Hellwig.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1cdd8a360510..208543e57eda 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 2aa37a4d2706..9faff287f747 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_iunlink_item.c b/fs/xfs/xfs_iunlink_item.c
index 1fd70a7aed63..a03a48eeb9a8 100644
--- a/fs/xfs/xfs_iunlink_item.c
+++ b/fs/xfs/xfs_iunlink_item.c
@@ -3,7 +3,7 @@
* Copyright (c) 2020-2022, Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index c1c31d1a8e21..ed4033006868 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -3,7 +3,7 @@
* Copyright (C) 2019 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index d4544ccafea5..a26378ca247d 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -22,6 +22,15 @@
#include "xfs_health.h"
#include "xfs_zone_alloc.h"
+struct xlog_write_data {
+ struct xlog_ticket *ticket;
+ struct xlog_in_core *iclog;
+ uint32_t bytes_left;
+ uint32_t record_cnt;
+ uint32_t data_cnt;
+ int log_offset;
+};
+
struct kmem_cache *xfs_log_ticket_cache;
/* Local miscellaneous function prototypes */
@@ -43,10 +52,7 @@ STATIC void xlog_state_do_callback(
STATIC int
xlog_state_get_iclog_space(
struct xlog *log,
- int len,
- struct xlog_in_core **iclog,
- struct xlog_ticket *ticket,
- int *logoffsetp);
+ struct xlog_write_data *data);
STATIC void
xlog_sync(
struct xlog *log,
@@ -74,62 +80,6 @@ xlog_iclogs_empty(
static int
xfs_log_cover(struct xfs_mount *);
-/*
- * We need to make sure the buffer pointer returned is naturally aligned for the
- * biggest basic data type we put into it. We have already accounted for this
- * padding when sizing the buffer.
- *
- * However, this padding does not get written into the log, and hence we have to
- * track the space used by the log vectors separately to prevent log space hangs
- * due to inaccurate accounting (i.e. a leak) of the used log space through the
- * CIL context ticket.
- *
- * We also add space for the xlog_op_header that describes this region in the
- * log. This prepends the data region we return to the caller to copy their data
- * into, so do all the static initialisation of the ophdr now. Because the ophdr
- * is not 8 byte aligned, we have to be careful to ensure that we align the
- * start of the buffer such that the region we return to the call is 8 byte
- * aligned and packed against the tail of the ophdr.
- */
-void *
-xlog_prepare_iovec(
- struct xfs_log_vec *lv,
- struct xfs_log_iovec **vecp,
- uint type)
-{
- struct xfs_log_iovec *vec = *vecp;
- struct xlog_op_header *oph;
- uint32_t len;
- void *buf;
-
- if (vec) {
- ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
- vec++;
- } else {
- vec = &lv->lv_iovecp[0];
- }
-
- len = lv->lv_buf_used + sizeof(struct xlog_op_header);
- if (!IS_ALIGNED(len, sizeof(uint64_t))) {
- lv->lv_buf_used = round_up(len, sizeof(uint64_t)) -
- sizeof(struct xlog_op_header);
- }
-
- vec->i_type = type;
- vec->i_addr = lv->lv_buf + lv->lv_buf_used;
-
- oph = vec->i_addr;
- oph->oh_clientid = XFS_TRANSACTION;
- oph->oh_res2 = 0;
- oph->oh_flags = 0;
-
- buf = vec->i_addr + sizeof(struct xlog_op_header);
- ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t)));
-
- *vecp = vec;
- return buf;
-}
-
static inline void
xlog_grant_sub_space(
struct xlog_grant_head *head,
@@ -848,6 +798,27 @@ xlog_wait_on_iclog(
return 0;
}
+int
+xlog_write_one_vec(
+ struct xlog *log,
+ struct xfs_cil_ctx *ctx,
+ struct xfs_log_iovec *reg,
+ struct xlog_ticket *ticket)
+{
+ struct xfs_log_vec lv = {
+ .lv_niovecs = 1,
+ .lv_iovecp = reg,
+ .lv_bytes = reg->i_len,
+ };
+ LIST_HEAD (lv_chain);
+
+ /* account for space used by record data */
+ ticket->t_curr_res -= lv.lv_bytes;
+
+ list_add(&lv.lv_list, &lv_chain);
+ return xlog_write(log, ctx, &lv_chain, ticket, lv.lv_bytes);
+}
+
/*
* Write out an unmount record using the ticket provided. We have to account for
* the data space used in the unmount ticket as this write is not done from a
@@ -876,21 +847,8 @@ xlog_write_unmount_record(
.i_len = sizeof(unmount_rec),
.i_type = XLOG_REG_TYPE_UNMOUNT,
};
- struct xfs_log_vec vec = {
- .lv_niovecs = 1,
- .lv_iovecp = &reg,
- };
- LIST_HEAD(lv_chain);
- list_add(&vec.lv_list, &lv_chain);
-
- BUILD_BUG_ON((sizeof(struct xlog_op_header) +
- sizeof(struct xfs_unmount_log_format)) !=
- sizeof(unmount_rec));
- /* account for space used by record data */
- ticket->t_curr_res -= sizeof(unmount_rec);
-
- return xlog_write(log, NULL, &lv_chain, ticket, reg.i_len);
+ return xlog_write_one_vec(log, NULL, &reg, ticket);
}
/*
@@ -1922,25 +1880,36 @@ xlog_print_trans(
}
}
+static inline uint32_t xlog_write_space_left(struct xlog_write_data *data)
+{
+ return data->iclog->ic_size - data->log_offset;
+}
+
+static void *
+xlog_write_space_advance(
+ struct xlog_write_data *data,
+ unsigned int len)
+{
+ void *p = data->iclog->ic_datap + data->log_offset;
+
+ ASSERT(xlog_write_space_left(data) >= len);
+ ASSERT(data->log_offset % sizeof(int32_t) == 0);
+ ASSERT(len % sizeof(int32_t) == 0);
+
+ data->data_cnt += len;
+ data->log_offset += len;
+ data->bytes_left -= len;
+ return p;
+}
+
static inline void
xlog_write_iovec(
- struct xlog_in_core *iclog,
- uint32_t *log_offset,
- void *data,
- uint32_t write_len,
- int *bytes_left,
- uint32_t *record_cnt,
- uint32_t *data_cnt)
+ struct xlog_write_data *data,
+ void *buf,
+ uint32_t buf_len)
{
- ASSERT(*log_offset < iclog->ic_log->l_iclog_size);
- ASSERT(*log_offset % sizeof(int32_t) == 0);
- ASSERT(write_len % sizeof(int32_t) == 0);
-
- memcpy(iclog->ic_datap + *log_offset, data, write_len);
- *log_offset += write_len;
- *bytes_left -= write_len;
- (*record_cnt)++;
- *data_cnt += write_len;
+ memcpy(xlog_write_space_advance(data, buf_len), buf, buf_len);
+ data->record_cnt++;
}
/*
@@ -1950,17 +1919,12 @@ xlog_write_iovec(
static void
xlog_write_full(
struct xfs_log_vec *lv,
- struct xlog_ticket *ticket,
- struct xlog_in_core *iclog,
- uint32_t *log_offset,
- uint32_t *len,
- uint32_t *record_cnt,
- uint32_t *data_cnt)
+ struct xlog_write_data *data)
{
int index;
- ASSERT(*log_offset + *len <= iclog->ic_size ||
- iclog->ic_state == XLOG_STATE_WANT_SYNC);
+ ASSERT(data->bytes_left <= xlog_write_space_left(data) ||
+ data->iclog->ic_state == XLOG_STATE_WANT_SYNC);
/*
* Ordered log vectors have no regions to write so this
@@ -1970,40 +1934,32 @@ xlog_write_full(
struct xfs_log_iovec *reg = &lv->lv_iovecp[index];
struct xlog_op_header *ophdr = reg->i_addr;
- ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
- xlog_write_iovec(iclog, log_offset, reg->i_addr,
- reg->i_len, len, record_cnt, data_cnt);
+ ophdr->oh_tid = cpu_to_be32(data->ticket->t_tid);
+ xlog_write_iovec(data, reg->i_addr, reg->i_len);
}
}
static int
xlog_write_get_more_iclog_space(
- struct xlog_ticket *ticket,
- struct xlog_in_core **iclogp,
- uint32_t *log_offset,
- uint32_t len,
- uint32_t *record_cnt,
- uint32_t *data_cnt)
+ struct xlog_write_data *data)
{
- struct xlog_in_core *iclog = *iclogp;
- struct xlog *log = iclog->ic_log;
+ struct xlog *log = data->iclog->ic_log;
int error;
spin_lock(&log->l_icloglock);
- ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC);
- xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
- error = xlog_state_release_iclog(log, iclog, ticket);
+ ASSERT(data->iclog->ic_state == XLOG_STATE_WANT_SYNC);
+ xlog_state_finish_copy(log, data->iclog, data->record_cnt,
+ data->data_cnt);
+ error = xlog_state_release_iclog(log, data->iclog, data->ticket);
spin_unlock(&log->l_icloglock);
if (error)
return error;
- error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
- log_offset);
+ error = xlog_state_get_iclog_space(log, data);
if (error)
return error;
- *record_cnt = 0;
- *data_cnt = 0;
- *iclogp = iclog;
+ data->record_cnt = 0;
+ data->data_cnt = 0;
return 0;
}
@@ -2016,14 +1972,8 @@ xlog_write_get_more_iclog_space(
static int
xlog_write_partial(
struct xfs_log_vec *lv,
- struct xlog_ticket *ticket,
- struct xlog_in_core **iclogp,
- uint32_t *log_offset,
- uint32_t *len,
- uint32_t *record_cnt,
- uint32_t *data_cnt)
+ struct xlog_write_data *data)
{
- struct xlog_in_core *iclog = *iclogp;
struct xlog_op_header *ophdr;
int index = 0;
uint32_t rlen;
@@ -2045,25 +1995,22 @@ xlog_write_partial(
* Hence if there isn't space for region data after the
* opheader, then we need to start afresh with a new iclog.
*/
- if (iclog->ic_size - *log_offset <=
+ if (xlog_write_space_left(data) <=
sizeof(struct xlog_op_header)) {
- error = xlog_write_get_more_iclog_space(ticket,
- &iclog, log_offset, *len, record_cnt,
- data_cnt);
+ error = xlog_write_get_more_iclog_space(data);
if (error)
return error;
}
ophdr = reg->i_addr;
- rlen = min_t(uint32_t, reg->i_len, iclog->ic_size - *log_offset);
+ rlen = min_t(uint32_t, reg->i_len, xlog_write_space_left(data));
- ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
+ ophdr->oh_tid = cpu_to_be32(data->ticket->t_tid);
ophdr->oh_len = cpu_to_be32(rlen - sizeof(struct xlog_op_header));
if (rlen != reg->i_len)
ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
- xlog_write_iovec(iclog, log_offset, reg->i_addr,
- rlen, len, record_cnt, data_cnt);
+ xlog_write_iovec(data, reg->i_addr, rlen);
/* If we wrote the whole region, move to the next. */
if (rlen == reg->i_len)
@@ -2098,22 +2045,20 @@ xlog_write_partial(
* consumes hasn't been accounted to the lv we are
* writing.
*/
- error = xlog_write_get_more_iclog_space(ticket,
- &iclog, log_offset,
- *len + sizeof(struct xlog_op_header),
- record_cnt, data_cnt);
+ data->bytes_left += sizeof(struct xlog_op_header);
+ error = xlog_write_get_more_iclog_space(data);
if (error)
return error;
- ophdr = iclog->ic_datap + *log_offset;
- ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
+ ophdr = xlog_write_space_advance(data,
+ sizeof(struct xlog_op_header));
+ ophdr->oh_tid = cpu_to_be32(data->ticket->t_tid);
ophdr->oh_clientid = XFS_TRANSACTION;
ophdr->oh_res2 = 0;
ophdr->oh_flags = XLOG_WAS_CONT_TRANS;
- ticket->t_curr_res -= sizeof(struct xlog_op_header);
- *log_offset += sizeof(struct xlog_op_header);
- *data_cnt += sizeof(struct xlog_op_header);
+ data->ticket->t_curr_res -=
+ sizeof(struct xlog_op_header);
/*
* If rlen fits in the iclog, then end the region
@@ -2121,26 +2066,19 @@ xlog_write_partial(
*/
reg_offset += rlen;
rlen = reg->i_len - reg_offset;
- if (rlen <= iclog->ic_size - *log_offset)
+ if (rlen <= xlog_write_space_left(data))
ophdr->oh_flags |= XLOG_END_TRANS;
else
ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
- rlen = min_t(uint32_t, rlen, iclog->ic_size - *log_offset);
+ rlen = min_t(uint32_t, rlen,
+ xlog_write_space_left(data));
ophdr->oh_len = cpu_to_be32(rlen);
- xlog_write_iovec(iclog, log_offset,
- reg->i_addr + reg_offset,
- rlen, len, record_cnt, data_cnt);
-
+ xlog_write_iovec(data, reg->i_addr + reg_offset, rlen);
} while (ophdr->oh_flags & XLOG_CONTINUE_TRANS);
}
- /*
- * No more iovecs remain in this logvec so return the next log vec to
- * the caller so it can go back to fast path copying.
- */
- *iclogp = iclog;
return 0;
}
@@ -2193,12 +2131,12 @@ xlog_write(
uint32_t len)
{
- struct xlog_in_core *iclog = NULL;
struct xfs_log_vec *lv;
- uint32_t record_cnt = 0;
- uint32_t data_cnt = 0;
- int error = 0;
- int log_offset;
+ struct xlog_write_data data = {
+ .ticket = ticket,
+ .bytes_left = len,
+ };
+ int error;
if (ticket->t_curr_res < 0) {
xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
@@ -2207,12 +2145,11 @@ xlog_write(
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
}
- error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
- &log_offset);
+ error = xlog_state_get_iclog_space(log, &data);
if (error)
return error;
- ASSERT(log_offset <= iclog->ic_size - 1);
+ ASSERT(xlog_write_space_left(&data) > 0);
/*
* If we have a context pointer, pass it the first iclog we are
@@ -2220,7 +2157,7 @@ xlog_write(
* ordering.
*/
if (ctx)
- xlog_cil_set_ctx_write_state(ctx, iclog);
+ xlog_cil_set_ctx_write_state(ctx, data.iclog);
list_for_each_entry(lv, lv_chain, lv_list) {
/*
@@ -2228,10 +2165,8 @@ xlog_write(
* the partial copy loop which can handle this case.
*/
if (lv->lv_niovecs &&
- lv->lv_bytes > iclog->ic_size - log_offset) {
- error = xlog_write_partial(lv, ticket, &iclog,
- &log_offset, &len, &record_cnt,
- &data_cnt);
+ lv->lv_bytes > xlog_write_space_left(&data)) {
+ error = xlog_write_partial(lv, &data);
if (error) {
/*
* We have no iclog to release, so just return
@@ -2240,11 +2175,10 @@ xlog_write(
return error;
}
} else {
- xlog_write_full(lv, ticket, iclog, &log_offset,
- &len, &record_cnt, &data_cnt);
+ xlog_write_full(lv, &data);
}
}
- ASSERT(len == 0);
+ ASSERT(data.bytes_left == 0);
/*
* We've already been guaranteed that the last writes will fit inside
@@ -2253,8 +2187,8 @@ xlog_write(
* iclog with the number of bytes written here.
*/
spin_lock(&log->l_icloglock);
- xlog_state_finish_copy(log, iclog, record_cnt, 0);
- error = xlog_state_release_iclog(log, iclog, ticket);
+ xlog_state_finish_copy(log, data.iclog, data.record_cnt, 0);
+ error = xlog_state_release_iclog(log, data.iclog, ticket);
spin_unlock(&log->l_icloglock);
return error;
@@ -2576,10 +2510,7 @@ xlog_state_done_syncing(
STATIC int
xlog_state_get_iclog_space(
struct xlog *log,
- int len,
- struct xlog_in_core **iclogp,
- struct xlog_ticket *ticket,
- int *logoffsetp)
+ struct xlog_write_data *data)
{
int log_offset;
struct xlog_rec_header *head;
@@ -2614,7 +2545,7 @@ restart:
* must be written.
*/
if (log_offset == 0) {
- ticket->t_curr_res -= log->l_iclog_hsize;
+ data->ticket->t_curr_res -= log->l_iclog_hsize;
head->h_cycle = cpu_to_be32(log->l_curr_cycle);
head->h_lsn = cpu_to_be64(
xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block));
@@ -2644,7 +2575,8 @@ restart:
* reference to the iclog.
*/
if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
- error = xlog_state_release_iclog(log, iclog, ticket);
+ error = xlog_state_release_iclog(log, iclog,
+ data->ticket);
spin_unlock(&log->l_icloglock);
if (error)
return error;
@@ -2657,16 +2589,16 @@ restart:
* iclogs (to mark it taken), this particular iclog will release/sync
* to disk in xlog_write().
*/
- if (len <= iclog->ic_size - iclog->ic_offset)
- iclog->ic_offset += len;
+ if (data->bytes_left <= iclog->ic_size - iclog->ic_offset)
+ iclog->ic_offset += data->bytes_left;
else
xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
- *iclogp = iclog;
+ data->iclog = iclog;
ASSERT(iclog->ic_offset <= iclog->ic_size);
spin_unlock(&log->l_icloglock);
- *logoffsetp = log_offset;
+ data->log_offset = log_offset;
return 0;
}
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index dcc1f44ed68f..0f23812b0b31 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -6,20 +6,9 @@
#ifndef __XFS_LOG_H__
#define __XFS_LOG_H__
+struct xlog_format_buf;
struct xfs_cil_ctx;
-struct xfs_log_vec {
- struct list_head lv_list; /* CIL lv chain ptrs */
- uint32_t lv_order_id; /* chain ordering info */
- int lv_niovecs; /* number of iovecs in lv */
- struct xfs_log_iovec *lv_iovecp; /* iovec array */
- struct xfs_log_item *lv_item; /* owner */
- char *lv_buf; /* formatted buffer */
- int lv_bytes; /* accounted space in buffer */
- int lv_buf_used; /* buffer space used so far */
- int lv_alloc_size; /* size of allocated lv */
-};
-
/* Region types for iovec's i_type */
#define XLOG_REG_TYPE_BFORMAT 1
#define XLOG_REG_TYPE_BCHUNK 2
@@ -70,58 +59,24 @@ xlog_calc_iovec_len(int len)
return roundup(len, sizeof(uint32_t));
}
-void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
- uint type);
-
-static inline void
-xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec,
- int data_len)
-{
- struct xlog_op_header *oph = vec->i_addr;
- int len;
-
- /*
- * Always round up the length to the correct alignment so callers don't
- * need to know anything about this log vec layout requirement. This
- * means we have to zero the area the data to be written does not cover.
- * This is complicated by fact the payload region is offset into the
- * logvec region by the opheader that tracks the payload.
- */
- len = xlog_calc_iovec_len(data_len);
- if (len - data_len != 0) {
- char *buf = vec->i_addr + sizeof(struct xlog_op_header);
-
- memset(buf + data_len, 0, len - data_len);
- }
-
- /*
- * The opheader tracks aligned payload length, whilst the logvec tracks
- * the overall region length.
- */
- oph->oh_len = cpu_to_be32(len);
-
- len += sizeof(struct xlog_op_header);
- lv->lv_buf_used += len;
- lv->lv_bytes += len;
- vec->i_len = len;
-
- /* Catch buffer overruns */
- ASSERT((void *)lv->lv_buf + lv->lv_bytes <=
- (void *)lv + lv->lv_alloc_size);
-}
+void *xlog_format_start(struct xlog_format_buf *lfb, uint16_t type);
+void xlog_format_commit(struct xlog_format_buf *lfb, unsigned int data_len);
/*
* Copy the amount of data requested by the caller into a new log iovec.
*/
static inline void *
-xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
- uint type, void *data, int len)
+xlog_format_copy(
+ struct xlog_format_buf *lfb,
+ uint16_t type,
+ void *data,
+ unsigned int len)
{
void *buf;
- buf = xlog_prepare_iovec(lv, vecp, type);
+ buf = xlog_format_start(lfb, type);
memcpy(buf, data, len);
- xlog_finish_iovec(lv, *vecp, len);
+ xlog_format_commit(lfb, len);
return buf;
}
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 778ac47adb8c..566976b8fef3 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -3,7 +3,7 @@
* Copyright (c) 2010 Red Hat, Inc. All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -409,6 +409,102 @@ xfs_cil_prepare_item(
lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
}
+struct xlog_format_buf {
+ struct xfs_log_vec *lv;
+ unsigned int idx;
+};
+
+/*
+ * We need to make sure the buffer pointer returned is naturally aligned for the
+ * biggest basic data type we put into it. We have already accounted for this
+ * padding when sizing the buffer.
+ *
+ * However, this padding does not get written into the log, and hence we have to
+ * track the space used by the log vectors separately to prevent log space hangs
+ * due to inaccurate accounting (i.e. a leak) of the used log space through the
+ * CIL context ticket.
+ *
+ * We also add space for the xlog_op_header that describes this region in the
+ * log. This prepends the data region we return to the caller to copy their data
+ * into, so do all the static initialisation of the ophdr now. Because the ophdr
+ * is not 8 byte aligned, we have to be careful to ensure that we align the
+ * start of the buffer such that the region we return to the call is 8 byte
+ * aligned and packed against the tail of the ophdr.
+ */
+void *
+xlog_format_start(
+ struct xlog_format_buf *lfb,
+ uint16_t type)
+{
+ struct xfs_log_vec *lv = lfb->lv;
+ struct xfs_log_iovec *vec = &lv->lv_iovecp[lfb->idx];
+ struct xlog_op_header *oph;
+ uint32_t len;
+ void *buf;
+
+ ASSERT(lfb->idx < lv->lv_niovecs);
+
+ len = lv->lv_buf_used + sizeof(struct xlog_op_header);
+ if (!IS_ALIGNED(len, sizeof(uint64_t))) {
+ lv->lv_buf_used = round_up(len, sizeof(uint64_t)) -
+ sizeof(struct xlog_op_header);
+ }
+
+ vec->i_type = type;
+ vec->i_addr = lv->lv_buf + lv->lv_buf_used;
+
+ oph = vec->i_addr;
+ oph->oh_clientid = XFS_TRANSACTION;
+ oph->oh_res2 = 0;
+ oph->oh_flags = 0;
+
+ buf = vec->i_addr + sizeof(struct xlog_op_header);
+ ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t)));
+ return buf;
+}
+
+void
+xlog_format_commit(
+ struct xlog_format_buf *lfb,
+ unsigned int data_len)
+{
+ struct xfs_log_vec *lv = lfb->lv;
+ struct xfs_log_iovec *vec = &lv->lv_iovecp[lfb->idx];
+ struct xlog_op_header *oph = vec->i_addr;
+ int len;
+
+ /*
+ * Always round up the length to the correct alignment so callers don't
+ * need to know anything about this log vec layout requirement. This
+ * means we have to zero the area the data to be written does not cover.
+ * This is complicated by fact the payload region is offset into the
+ * logvec region by the opheader that tracks the payload.
+ */
+ len = xlog_calc_iovec_len(data_len);
+ if (len - data_len != 0) {
+ char *buf = vec->i_addr + sizeof(struct xlog_op_header);
+
+ memset(buf + data_len, 0, len - data_len);
+ }
+
+ /*
+ * The opheader tracks aligned payload length, whilst the logvec tracks
+ * the overall region length.
+ */
+ oph->oh_len = cpu_to_be32(len);
+
+ len += sizeof(struct xlog_op_header);
+ lv->lv_buf_used += len;
+ lv->lv_bytes += len;
+ vec->i_len = len;
+
+ /* Catch buffer overruns */
+ ASSERT((void *)lv->lv_buf + lv->lv_bytes <=
+ (void *)lv + lv->lv_alloc_size);
+
+ lfb->idx++;
+}
+
/*
* Format log item into a flat buffers
*
@@ -454,6 +550,7 @@ xlog_cil_insert_format_items(
list_for_each_entry(lip, &tp->t_items, li_trans) {
struct xfs_log_vec *lv = lip->li_lv;
struct xfs_log_vec *shadow = lip->li_lv_shadow;
+ struct xlog_format_buf lfb = { };
/* Skip items which aren't dirty in this transaction. */
if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
@@ -501,8 +598,9 @@ xlog_cil_insert_format_items(
lv->lv_item = lip;
}
+ lfb.lv = lv;
ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
- lip->li_ops->iop_format(lip, lv);
+ lip->li_ops->iop_format(lip, &lfb);
xfs_cil_prepare_item(log, lip, lv, diff_len);
}
}
@@ -1098,13 +1196,7 @@ xlog_cil_write_commit_record(
.i_len = sizeof(struct xlog_op_header),
.i_type = XLOG_REG_TYPE_COMMIT,
};
- struct xfs_log_vec vec = {
- .lv_niovecs = 1,
- .lv_iovecp = &reg,
- };
int error;
- LIST_HEAD(lv_chain);
- list_add(&vec.lv_list, &lv_chain);
if (xlog_is_shutdown(log))
return -EIO;
@@ -1112,10 +1204,7 @@ xlog_cil_write_commit_record(
error = xlog_cil_order_write(ctx->cil, ctx->sequence, _COMMIT_RECORD);
if (error)
return error;
-
- /* account for space used by record data */
- ctx->ticket->t_curr_res -= reg.i_len;
- error = xlog_write(log, ctx, &lv_chain, ctx->ticket, reg.i_len);
+ error = xlog_write_one_vec(log, ctx, &reg, ctx->ticket);
if (error)
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
return error;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 0fe59f0525aa..cf1e4ce61a8c 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -13,6 +13,24 @@ struct xlog;
struct xlog_ticket;
struct xfs_mount;
+struct xfs_log_iovec {
+ void *i_addr;/* beginning address of region */
+ int i_len; /* length in bytes of region */
+ uint i_type; /* type of region */
+};
+
+struct xfs_log_vec {
+ struct list_head lv_list; /* CIL lv chain ptrs */
+ uint32_t lv_order_id; /* chain ordering info */
+ int lv_niovecs; /* number of iovecs in lv */
+ struct xfs_log_iovec *lv_iovecp; /* iovec array */
+ struct xfs_log_item *lv_item; /* owner */
+ char *lv_buf; /* formatted buffer */
+ int lv_bytes; /* accounted space in buffer */
+ int lv_buf_used; /* buffer space used so far */
+ int lv_alloc_size; /* size of allocated lv */
+};
+
/*
* get client id from packed copy.
*
@@ -507,6 +525,8 @@ void xlog_print_trans(struct xfs_trans *);
int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx,
struct list_head *lv_chain, struct xlog_ticket *tic,
uint32_t len);
+int xlog_write_one_vec(struct xlog *log, struct xfs_cil_ctx *ctx,
+ struct xfs_log_iovec *reg, struct xlog_ticket *ticket);
void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 03e42c7dab56..935905743f94 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -2953,18 +2953,23 @@ xlog_valid_rec_header(
xfs_daddr_t blkno,
int bufsize)
{
+ struct xfs_mount *mp = log->l_mp;
+ u32 h_version = be32_to_cpu(rhead->h_version);
int hlen;
- if (XFS_IS_CORRUPT(log->l_mp,
+ if (XFS_IS_CORRUPT(mp,
rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM)))
return -EFSCORRUPTED;
- if (XFS_IS_CORRUPT(log->l_mp,
- (!rhead->h_version ||
- (be32_to_cpu(rhead->h_version) &
- (~XLOG_VERSION_OKBITS))))) {
- xfs_warn(log->l_mp, "%s: unrecognised log version (%d).",
- __func__, be32_to_cpu(rhead->h_version));
- return -EFSCORRUPTED;
+
+ /*
+ * The log version must match the superblock
+ */
+ if (xfs_has_logv2(mp)) {
+ if (XFS_IS_CORRUPT(mp, h_version != XLOG_VERSION_2))
+ return -EFSCORRUPTED;
+ } else {
+ if (XFS_IS_CORRUPT(mp, h_version != XLOG_VERSION_1))
+ return -EFSCORRUPTED;
}
/*
@@ -2972,12 +2977,12 @@ xlog_valid_rec_header(
* and h_len must not be greater than LR buffer size.
*/
hlen = be32_to_cpu(rhead->h_len);
- if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > bufsize))
+ if (XFS_IS_CORRUPT(mp, hlen <= 0 || hlen > bufsize))
return -EFSCORRUPTED;
- if (XFS_IS_CORRUPT(log->l_mp,
- blkno > log->l_logBBsize || blkno > INT_MAX))
+ if (XFS_IS_CORRUPT(mp, blkno > log->l_logBBsize || blkno > INT_MAX))
return -EFSCORRUPTED;
+
return 0;
}
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 19aba2c3d525..fd297082aeb8 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -3,7 +3,7 @@
* Copyright (c) 2011 Red Hat, Inc. All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_error.h"
#include "xfs_shared.h"
@@ -149,14 +149,6 @@ xfs_warn_experimental(
.opstate = XFS_OPSTATE_WARNED_LARP,
.name = "logged extended attributes",
},
- [XFS_EXPERIMENTAL_LBS] = {
- .opstate = XFS_OPSTATE_WARNED_LBS,
- .name = "large block size",
- },
- [XFS_EXPERIMENTAL_METADIR] = {
- .opstate = XFS_OPSTATE_WARNED_METADIR,
- .name = "metadata directory tree",
- },
[XFS_EXPERIMENTAL_ZONED] = {
.opstate = XFS_OPSTATE_WARNED_ZONED,
.name = "zoned RT device",
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index d68e72379f9d..49b0ef40d299 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -93,8 +93,6 @@ void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg,
enum xfs_experimental_feat {
XFS_EXPERIMENTAL_SHRINK,
XFS_EXPERIMENTAL_LARP,
- XFS_EXPERIMENTAL_LBS,
- XFS_EXPERIMENTAL_METADIR,
XFS_EXPERIMENTAL_ZONED,
XFS_EXPERIMENTAL_MAX,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 0953f6ae94ab..9c295abd0a0a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -41,6 +41,7 @@
#include "xfs_rtrefcount_btree.h"
#include "scrub/stats.h"
#include "xfs_zone_alloc.h"
+#include "xfs_healthmon.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
@@ -625,6 +626,7 @@ xfs_unmount_flush_inodes(
cancel_delayed_work_sync(&mp->m_reclaim_work);
xfs_reclaim_inodes(mp);
xfs_health_unmount(mp);
+ xfs_healthmon_unmount(mp);
}
static void
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b871dfde372b..61c71128d171 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -13,6 +13,7 @@ struct xfs_ail;
struct xfs_quotainfo;
struct xfs_da_geometry;
struct xfs_perag;
+struct xfs_healthmon;
/* dynamic preallocation free space thresholds, 5% down to 1% */
enum {
@@ -342,6 +343,9 @@ typedef struct xfs_mount {
/* Hook to feed dirent updates to an active online repair. */
struct xfs_hooks m_dir_update_hooks;
+
+ /* Private data referring to a health monitor object. */
+ struct xfs_healthmon *m_healthmon;
} xfs_mount_t;
#define M_IGEO(mp) (&(mp)->m_ino_geo)
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 73b7e72944e4..4e417747688f 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -3,7 +3,7 @@
* Copyright (c) 2006-2007 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_mru_cache.h"
/*
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index 6d5002413c2c..6be19fa1ebe2 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -3,7 +3,7 @@
* Copyright (c) 2022 Fujitsu. All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -22,6 +22,7 @@
#include "xfs_notify_failure.h"
#include "xfs_rtgroup.h"
#include "xfs_rtrmap_btree.h"
+#include "xfs_healthmon.h"
#include <linux/mm.h>
#include <linux/dax.h>
@@ -219,6 +220,8 @@ xfs_dax_notify_logdev_failure(
if (error)
return error;
+ xfs_healthmon_report_media(mp, XFS_DEV_LOG, daddr, bblen);
+
/*
* In the pre-remove case the failure notification is attempting to
* trigger a force unmount. The expectation is that the device is
@@ -252,16 +255,20 @@ xfs_dax_notify_dev_failure(
uint64_t bblen;
struct xfs_group *xg = NULL;
- if (!xfs_has_rmapbt(mp)) {
- xfs_debug(mp, "notify_failure() needs rmapbt enabled!");
- return -EOPNOTSUPP;
- }
-
error = xfs_dax_translate_range(xfs_group_type_buftarg(mp, type),
offset, len, &daddr, &bblen);
if (error)
return error;
+ xfs_healthmon_report_media(mp,
+ type == XG_TYPE_RTG ? XFS_DEV_RT : XFS_DEV_DATA,
+ daddr, bblen);
+
+ if (!xfs_has_rmapbt(mp)) {
+ xfs_debug(mp, "notify_failure() needs rmapbt enabled!");
+ return -EOPNOTSUPP;
+ }
+
if (type == XG_TYPE_RTG) {
start_bno = xfs_daddr_to_rtb(mp, daddr);
end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1);
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_platform.h
index 55064228c4d5..1e59bf94d1f2 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_platform.h
@@ -3,24 +3,11 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#ifndef __XFS_LINUX__
-#define __XFS_LINUX__
+#ifndef _XFS_PLATFORM_H
+#define _XFS_PLATFORM_H
#include <linux/types.h>
#include <linux/uuid.h>
-
-/*
- * Kernel specific type declarations for XFS
- */
-
-typedef __s64 xfs_off_t; /* <file offset> type */
-typedef unsigned long long xfs_ino_t; /* <inode> type */
-typedef __s64 xfs_daddr_t; /* <disk address> type */
-typedef __u32 xfs_dev_t;
-typedef __u32 xfs_nlink_t;
-
-#include "xfs_types.h"
-
#include <linux/semaphore.h>
#include <linux/mm.h>
#include <linux/sched/mm.h>
@@ -63,7 +50,6 @@ typedef __u32 xfs_nlink_t;
#include <linux/xattr.h>
#include <linux/mnt_idmapping.h>
#include <linux/debugfs.h>
-
#include <asm/page.h>
#include <asm/div64.h>
#include <asm/param.h>
@@ -71,6 +57,32 @@ typedef __u32 xfs_nlink_t;
#include <asm/byteorder.h>
#include <linux/unaligned.h>
+#ifdef CONFIG_XFS_DEBUG
+#define DEBUG 1
+#endif
+
+#ifdef CONFIG_XFS_DEBUG_EXPENSIVE
+#define DEBUG_EXPENSIVE 1
+#endif
+
+#ifdef CONFIG_XFS_ASSERT_FATAL
+#define XFS_ASSERT_FATAL 1
+#endif
+
+#ifdef CONFIG_XFS_WARN
+#define XFS_WARN 1
+#endif
+
+/*
+ * Kernel specific type declarations for XFS
+ */
+typedef __s64 xfs_off_t; /* <file offset> type */
+typedef unsigned long long xfs_ino_t; /* <inode> type */
+typedef __s64 xfs_daddr_t; /* <disk address> type */
+typedef __u32 xfs_dev_t;
+typedef __u32 xfs_nlink_t;
+
+#include "xfs_types.h"
#include "xfs_fs.h"
#include "xfs_stats.h"
#include "xfs_sysctl.h"
@@ -279,4 +291,4 @@ kmem_to_page(void *addr)
return virt_to_page(addr);
}
-#endif /* __XFS_LINUX__ */
+#endif /* _XFS_PLATFORM_H */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index afe7497012d4..221e55887a2a 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -2,7 +2,7 @@
/*
* Copyright (c) 2014 Christoph Hellwig.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c
index c283b801cc5d..7c79ab0db0e2 100644
--- a/fs/xfs/xfs_pwork.c
+++ b/fs/xfs/xfs_pwork.c
@@ -3,7 +3,7 @@
* Copyright (C) 2019 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 95be67ac6eb4..a3e7d4a107d4 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index edc0aef3cf34..a094b8252ffd 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 022e2179c06b..d50b7318cb5c 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -5,7 +5,7 @@
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 94fbe3d99ec7..8804508cc2b8 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -3,7 +3,7 @@
* Copyright (c) 2008, Christoph Hellwig
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 3728234699a2..881c3f3a6a24 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -3,7 +3,7 @@
* Copyright (C) 2016 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -93,10 +93,9 @@ unsigned int xfs_cui_log_space(unsigned int nr)
STATIC void
xfs_cui_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
ASSERT(atomic_read(&cuip->cui_next_extent) ==
cuip->cui_format.cui_nextents);
@@ -105,7 +104,7 @@ xfs_cui_item_format(
cuip->cui_format.cui_type = lip->li_type;
cuip->cui_format.cui_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format,
xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents));
}
@@ -199,17 +198,16 @@ unsigned int xfs_cud_log_space(void)
STATIC void
xfs_cud_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
ASSERT(lip->li_type == XFS_LI_CUD || lip->li_type == XFS_LI_CUD_RT);
cudp->cud_format.cud_type = lip->li_type;
cudp->cud_format.cud_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format,
sizeof(struct xfs_cud_log_format));
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 3f177b4ec131..db23a0f231d6 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -3,7 +3,7 @@
* Copyright (C) 2016 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 15f0903f6fd4..a39fe08dcd8f 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -3,7 +3,7 @@
* Copyright (C) 2016 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -92,10 +92,9 @@ unsigned int xfs_rui_log_space(unsigned int nr)
STATIC void
xfs_rui_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
ASSERT(atomic_read(&ruip->rui_next_extent) ==
ruip->rui_format.rui_nextents);
@@ -105,7 +104,7 @@ xfs_rui_item_format(
ruip->rui_format.rui_type = lip->li_type;
ruip->rui_format.rui_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format,
xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents));
}
@@ -200,17 +199,16 @@ unsigned int xfs_rud_log_space(void)
STATIC void
xfs_rud_item_format(
struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
+ struct xlog_format_buf *lfb)
{
struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
ASSERT(lip->li_type == XFS_LI_RUD || lip->li_type == XFS_LI_RUD_RT);
rudp->rud_format.rud_type = lip->li_type;
rudp->rud_format.rud_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format,
+ xlog_format_copy(lfb, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format,
sizeof(struct xfs_rud_log_format));
}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a12ffed12391..90a94a5b6f7e 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 35c7fb3ba324..017db0361cd8 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
struct xstats xfsstats;
@@ -23,7 +23,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
uint64_t xs_xstrat_bytes = 0;
uint64_t xs_write_bytes = 0;
uint64_t xs_read_bytes = 0;
- uint64_t defer_relog = 0;
+ uint64_t xs_defer_relog = 0;
+ uint64_t xs_gc_bytes = 0;
static const struct xstats_entry {
char *desc;
@@ -57,7 +58,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
{ "rtrmapbt_mem", xfsstats_offset(xs_rtrefcbt_2) },
{ "rtrefcntbt", xfsstats_offset(xs_qm_dqreclaims)},
/* we print both series of quota information together */
- { "qm", xfsstats_offset(xs_xstrat_bytes)},
+ { "qm", xfsstats_offset(xs_gc_read_calls)},
+ { "zoned", xfsstats_offset(__pad1)},
};
/* Loop over all stats groups */
@@ -76,19 +78,21 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
xs_xstrat_bytes += per_cpu_ptr(stats, i)->s.xs_xstrat_bytes;
xs_write_bytes += per_cpu_ptr(stats, i)->s.xs_write_bytes;
xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes;
- defer_relog += per_cpu_ptr(stats, i)->s.defer_relog;
+ xs_defer_relog += per_cpu_ptr(stats, i)->s.xs_defer_relog;
+ xs_gc_bytes += per_cpu_ptr(stats, i)->s.xs_gc_bytes;
}
len += scnprintf(buf + len, PATH_MAX-len, "xpc %llu %llu %llu\n",
xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n",
- defer_relog);
+ xs_defer_relog);
len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n",
#if defined(DEBUG)
1);
#else
0);
#endif
+ len += scnprintf(buf + len, PATH_MAX-len, "gc xpc %llu\n", xs_gc_bytes);
return len;
}
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 15ba1abcf253..153d2381d0a8 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -138,11 +138,17 @@ struct __xfsstats {
uint32_t xs_qm_dqwants;
uint32_t xs_qm_dquot;
uint32_t xs_qm_dquot_unused;
+/* Zone GC counters */
+ uint32_t xs_gc_read_calls;
+ uint32_t xs_gc_write_calls;
+ uint32_t xs_gc_zone_reset_calls;
+ uint32_t __pad1;
/* Extra precision counters */
uint64_t xs_xstrat_bytes;
uint64_t xs_write_bytes;
uint64_t xs_read_bytes;
- uint64_t defer_relog;
+ uint64_t xs_defer_relog;
+ uint64_t xs_gc_bytes;
};
#define xfsstats_offset(f) (offsetof(struct __xfsstats, f)/sizeof(uint32_t))
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 094f257eff15..76867eb3f975 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -4,7 +4,7 @@
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -40,6 +40,8 @@
#include "xfs_defer.h"
#include "xfs_attr_item.h"
#include "xfs_xattr.h"
+#include "xfs_error.h"
+#include "xfs_errortag.h"
#include "xfs_iunlink_item.h"
#include "xfs_dahash_test.h"
#include "xfs_rtbitmap.h"
@@ -47,12 +49,14 @@
#include "xfs_parent.h"
#include "xfs_rtalloc.h"
#include "xfs_zone_alloc.h"
+#include "xfs_healthmon.h"
#include "scrub/stats.h"
#include "scrub/rcbag_btree.h"
#include <linux/magic.h>
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
+#include <linux/fserror.h>
static const struct super_operations xfs_super_operations;
@@ -111,7 +115,7 @@ enum {
Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones,
- Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write,
+ Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write, Opt_errortag,
};
#define fsparam_dead(NAME) \
@@ -170,6 +174,7 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = {
fsparam_flag("lifetime", Opt_lifetime),
fsparam_flag("nolifetime", Opt_nolifetime),
fsparam_string("max_atomic_write", Opt_max_atomic_write),
+ fsparam_string("errortag", Opt_errortag),
{}
};
@@ -794,6 +799,9 @@ xfs_mount_free(
debugfs_remove(mp->m_debugfs);
kfree(mp->m_rtname);
kfree(mp->m_logname);
+#ifdef DEBUG
+ kfree(mp->m_errortag);
+#endif
kfree(mp);
}
@@ -1273,6 +1281,15 @@ xfs_fs_show_stats(
return 0;
}
+static void
+xfs_fs_report_error(
+ const struct fserror_event *event)
+{
+ /* healthmon already knows about non-inode and metadata errors */
+ if (event->inode && event->type != FSERR_METADATA)
+ xfs_healthmon_report_file_ioerror(XFS_I(event->inode), event);
+}
+
static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
@@ -1288,6 +1305,7 @@ static const struct super_operations xfs_super_operations = {
.free_cached_objects = xfs_fs_free_cached_objects,
.shutdown = xfs_fs_shutdown,
.show_stats = xfs_fs_show_stats,
+ .report_error = xfs_fs_report_error,
};
static int
@@ -1548,6 +1566,8 @@ xfs_fs_parse_param(
return -EINVAL;
}
return 0;
+ case Opt_errortag:
+ return xfs_errortag_add_name(parsing_mp, param->string);
default:
xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
return -EINVAL;
@@ -1806,8 +1826,6 @@ xfs_fs_fill_super(
error = -ENOSYS;
goto out_free_sb;
}
-
- xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LBS);
}
/* Ensure this filesystem fits in the page cache limits */
@@ -1893,8 +1911,6 @@ xfs_fs_fill_super(
goto out_filestream_unmount;
}
xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED);
- } else if (xfs_has_metadir(mp)) {
- xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR);
}
if (xfs_has_reflink(mp)) {
@@ -2143,6 +2159,8 @@ xfs_fs_reconfigure(
if (error)
return error;
+ xfs_errortag_copy(mp, new_mp);
+
/* Validate new max_atomic_write option before making other changes */
if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) {
error = xfs_set_max_atomic_write_opt(mp,
@@ -2229,6 +2247,14 @@ xfs_init_fs_context(
mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
if (!mp)
return -ENOMEM;
+#ifdef DEBUG
+ mp->m_errortag = kcalloc(XFS_ERRTAG_MAX, sizeof(*mp->m_errortag),
+ GFP_KERNEL);
+ if (!mp->m_errortag) {
+ kfree(mp);
+ return -ENOMEM;
+ }
+#endif
spin_lock_init(&mp->m_sb_lock);
for (i = 0; i < XG_TYPE_MAX; i++)
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 4252b07cd251..c4da624fb296 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -4,7 +4,7 @@
* Copyright (c) 2012-2013 Red Hat, Inc.
* All rights reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_fs.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index 9918f14b4874..7f32d282dc88 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -3,7 +3,7 @@
* Copyright (c) 2001-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_error.h"
static struct ctl_table_header *xfs_table_header;
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 7a5c5ef2db92..6c7909838234 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -4,7 +4,7 @@
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index a60556dbd172..912713a8a019 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -3,7 +3,7 @@
* Copyright (c) 2009, Christoph Hellwig
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_bit.h"
@@ -51,6 +51,11 @@
#include "xfs_rtgroup.h"
#include "xfs_zone_alloc.h"
#include "xfs_zone_priv.h"
+#include "xfs_health.h"
+#include "xfs_healthmon.h"
+#include "xfs_notify_failure.h"
+#include "xfs_file.h"
+#include <linux/fserror.h>
/*
* We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index f70afbf3cb19..813e5a9f57eb 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -103,6 +103,9 @@ struct xfs_refcount_intent;
struct xfs_metadir_update;
struct xfs_rtgroup;
struct xfs_open_zone;
+struct xfs_healthmon_event;
+struct xfs_healthmon;
+struct fserror_event;
#define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT, "ROOT" }, \
@@ -2410,6 +2413,7 @@ DEFINE_ATTR_EVENT(xfs_attr_sf_addname);
DEFINE_ATTR_EVENT(xfs_attr_sf_create);
DEFINE_ATTR_EVENT(xfs_attr_sf_lookup);
DEFINE_ATTR_EVENT(xfs_attr_sf_remove);
+DEFINE_ATTR_EVENT(xfs_attr_sf_replace);
DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf);
DEFINE_ATTR_EVENT(xfs_attr_leaf_add);
@@ -5906,6 +5910,515 @@ DEFINE_EVENT(xfs_freeblocks_resv_class, name, \
DEFINE_FREEBLOCKS_RESV_EVENT(xfs_freecounter_reserved);
DEFINE_FREEBLOCKS_RESV_EVENT(xfs_freecounter_enospc);
+TRACE_EVENT(xfs_healthmon_lost_event,
+ TP_PROTO(const struct xfs_healthmon *hm),
+ TP_ARGS(hm),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned long long, lost_prev)
+ ),
+ TP_fast_assign(
+ __entry->dev = hm->dev;
+ __entry->lost_prev = hm->lost_prev_event;
+ ),
+ TP_printk("dev %d:%d lost_prev %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->lost_prev)
+);
+
+#define XFS_HEALTHMON_FLAGS_STRINGS \
+ { XFS_HEALTH_MONITOR_VERBOSE, "verbose" }
+#define XFS_HEALTHMON_FMT_STRINGS \
+ { XFS_HEALTH_MONITOR_FMT_V0, "v0" }
+
+TRACE_EVENT(xfs_healthmon_create,
+ TP_PROTO(dev_t dev, u64 flags, u8 format),
+ TP_ARGS(dev, flags, format),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u64, flags)
+ __field(u8, format)
+ ),
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->flags = flags;
+ __entry->format = format;
+ ),
+ TP_printk("dev %d:%d flags %s format %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_flags(__entry->flags, "|", XFS_HEALTHMON_FLAGS_STRINGS),
+ __print_symbolic(__entry->format, XFS_HEALTHMON_FMT_STRINGS))
+);
+
+TRACE_EVENT(xfs_healthmon_copybuf,
+ TP_PROTO(const struct xfs_healthmon *hm, const struct iov_iter *iov),
+ TP_ARGS(hm, iov),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(size_t, bufsize)
+ __field(size_t, inpos)
+ __field(size_t, outpos)
+ __field(size_t, to_copy)
+ __field(size_t, iter_count)
+ ),
+ TP_fast_assign(
+ __entry->dev = hm->dev;
+ __entry->bufsize = hm->bufsize;
+ __entry->inpos = hm->bufhead;
+ __entry->outpos = hm->buftail;
+ if (hm->bufhead > hm->buftail)
+ __entry->to_copy = hm->bufhead - hm->buftail;
+ else
+ __entry->to_copy = 0;
+ __entry->iter_count = iov_iter_count(iov);
+ ),
+ TP_printk("dev %d:%d bufsize %zu in_pos %zu out_pos %zu to_copy %zu iter_count %zu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->bufsize,
+ __entry->inpos,
+ __entry->outpos,
+ __entry->to_copy,
+ __entry->iter_count)
+);
+
+DECLARE_EVENT_CLASS(xfs_healthmon_class,
+ TP_PROTO(const struct xfs_healthmon *hm),
+ TP_ARGS(hm),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, events)
+ __field(unsigned long long, lost_prev)
+ ),
+ TP_fast_assign(
+ __entry->dev = hm->dev;
+ __entry->events = hm->events;
+ __entry->lost_prev = hm->lost_prev_event;
+ ),
+ TP_printk("dev %d:%d events %u lost_prev? %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->events,
+ __entry->lost_prev)
+);
+#define DEFINE_HEALTHMON_EVENT(name) \
+DEFINE_EVENT(xfs_healthmon_class, name, \
+ TP_PROTO(const struct xfs_healthmon *hm), \
+ TP_ARGS(hm))
+DEFINE_HEALTHMON_EVENT(xfs_healthmon_read_start);
+DEFINE_HEALTHMON_EVENT(xfs_healthmon_read_finish);
+DEFINE_HEALTHMON_EVENT(xfs_healthmon_release);
+DEFINE_HEALTHMON_EVENT(xfs_healthmon_detach);
+DEFINE_HEALTHMON_EVENT(xfs_healthmon_report_unmount);
+
+#define XFS_HEALTHMON_TYPE_STRINGS \
+ { XFS_HEALTHMON_LOST, "lost" }, \
+ { XFS_HEALTHMON_UNMOUNT, "unmount" }, \
+ { XFS_HEALTHMON_SICK, "sick" }, \
+ { XFS_HEALTHMON_CORRUPT, "corrupt" }, \
+ { XFS_HEALTHMON_HEALTHY, "healthy" }, \
+ { XFS_HEALTHMON_SHUTDOWN, "shutdown" }
+
+#define XFS_HEALTHMON_DOMAIN_STRINGS \
+ { XFS_HEALTHMON_MOUNT, "mount" }, \
+ { XFS_HEALTHMON_FS, "fs" }, \
+ { XFS_HEALTHMON_AG, "ag" }, \
+ { XFS_HEALTHMON_INODE, "inode" }, \
+ { XFS_HEALTHMON_RTGROUP, "rtgroup" }
+
+TRACE_DEFINE_ENUM(XFS_HEALTHMON_LOST);
+TRACE_DEFINE_ENUM(XFS_HEALTHMON_SHUTDOWN);
+TRACE_DEFINE_ENUM(XFS_HEALTHMON_UNMOUNT);
+TRACE_DEFINE_ENUM(XFS_HEALTHMON_SICK);
+TRACE_DEFINE_ENUM(XFS_HEALTHMON_CORRUPT);
+TRACE_DEFINE_ENUM(XFS_HEALTHMON_HEALTHY);
+
+TRACE_DEFINE_ENUM(XFS_HEALTHMON_MOUNT);
+TRACE_DEFINE_ENUM(XFS_HEALTHMON_FS);
+TRACE_DEFINE_ENUM(XFS_HEALTHMON_AG);
+TRACE_DEFINE_ENUM(XFS_HEALTHMON_INODE);
+TRACE_DEFINE_ENUM(XFS_HEALTHMON_RTGROUP);
+
+DECLARE_EVENT_CLASS(xfs_healthmon_event_class,
+ TP_PROTO(const struct xfs_healthmon *hm,
+ const struct xfs_healthmon_event *event),
+ TP_ARGS(hm, event),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, type)
+ __field(unsigned int, domain)
+ __field(unsigned int, mask)
+ __field(unsigned long long, ino)
+ __field(unsigned int, gen)
+ __field(unsigned int, group)
+ __field(unsigned long long, offset)
+ __field(unsigned long long, length)
+ __field(unsigned long long, lostcount)
+ ),
+ TP_fast_assign(
+ __entry->dev = hm->dev;
+ __entry->type = event->type;
+ __entry->domain = event->domain;
+ __entry->mask = 0;
+ __entry->group = 0;
+ __entry->ino = 0;
+ __entry->gen = 0;
+ __entry->offset = 0;
+ __entry->length = 0;
+ __entry->lostcount = 0;
+ switch (__entry->domain) {
+ case XFS_HEALTHMON_MOUNT:
+ switch (__entry->type) {
+ case XFS_HEALTHMON_SHUTDOWN:
+ __entry->mask = event->flags;
+ break;
+ case XFS_HEALTHMON_LOST:
+ __entry->lostcount = event->lostcount;
+ break;
+ }
+ break;
+ case XFS_HEALTHMON_FS:
+ __entry->mask = event->fsmask;
+ break;
+ case XFS_HEALTHMON_AG:
+ case XFS_HEALTHMON_RTGROUP:
+ __entry->mask = event->grpmask;
+ __entry->group = event->group;
+ break;
+ case XFS_HEALTHMON_INODE:
+ __entry->mask = event->imask;
+ __entry->ino = event->ino;
+ __entry->gen = event->gen;
+ break;
+ case XFS_HEALTHMON_DATADEV:
+ case XFS_HEALTHMON_LOGDEV:
+ case XFS_HEALTHMON_RTDEV:
+ __entry->offset = event->daddr;
+ __entry->length = event->bbcount;
+ break;
+ case XFS_HEALTHMON_FILERANGE:
+ __entry->ino = event->fino;
+ __entry->gen = event->fgen;
+ __entry->offset = event->fpos;
+ __entry->length = event->flen;
+ break;
+ }
+ ),
+ TP_printk("dev %d:%d type %s domain %s mask 0x%x ino 0x%llx gen 0x%x offset 0x%llx len 0x%llx group 0x%x lost %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XFS_HEALTHMON_TYPE_STRINGS),
+ __print_symbolic(__entry->domain, XFS_HEALTHMON_DOMAIN_STRINGS),
+ __entry->mask,
+ __entry->ino,
+ __entry->gen,
+ __entry->offset,
+ __entry->length,
+ __entry->group,
+ __entry->lostcount)
+);
+#define DEFINE_HEALTHMONEVENT_EVENT(name) \
+DEFINE_EVENT(xfs_healthmon_event_class, name, \
+ TP_PROTO(const struct xfs_healthmon *hm, \
+ const struct xfs_healthmon_event *event), \
+ TP_ARGS(hm, event))
+DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_insert);
+DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_push);
+DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_pop);
+DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_format);
+DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_format_overflow);
+DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_drop);
+DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_merge);
+
+TRACE_EVENT(xfs_healthmon_report_fs,
+ TP_PROTO(const struct xfs_healthmon *hm,
+ unsigned int old_mask, unsigned int new_mask,
+ const struct xfs_healthmon_event *event),
+ TP_ARGS(hm, old_mask, new_mask, event),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, type)
+ __field(unsigned int, domain)
+ __field(unsigned int, old_mask)
+ __field(unsigned int, new_mask)
+ __field(unsigned int, fsmask)
+ ),
+ TP_fast_assign(
+ __entry->dev = hm->dev;
+ __entry->type = event->type;
+ __entry->domain = event->domain;
+ __entry->old_mask = old_mask;
+ __entry->new_mask = new_mask;
+ __entry->fsmask = event->fsmask;
+ ),
+ TP_printk("dev %d:%d type %s domain %s oldmask 0x%x newmask 0x%x fsmask 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XFS_HEALTHMON_TYPE_STRINGS),
+ __print_symbolic(__entry->domain, XFS_HEALTHMON_DOMAIN_STRINGS),
+ __entry->old_mask,
+ __entry->new_mask,
+ __entry->fsmask)
+);
+
+TRACE_EVENT(xfs_healthmon_report_group,
+ TP_PROTO(const struct xfs_healthmon *hm,
+ unsigned int old_mask, unsigned int new_mask,
+ const struct xfs_healthmon_event *event),
+ TP_ARGS(hm, old_mask, new_mask, event),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, type)
+ __field(unsigned int, domain)
+ __field(unsigned int, old_mask)
+ __field(unsigned int, new_mask)
+ __field(unsigned int, grpmask)
+ __field(unsigned int, group)
+ ),
+ TP_fast_assign(
+ __entry->dev = hm->dev;
+ __entry->type = event->type;
+ __entry->domain = event->domain;
+ __entry->old_mask = old_mask;
+ __entry->new_mask = new_mask;
+ __entry->grpmask = event->grpmask;
+ __entry->group = event->group;
+ ),
+ TP_printk("dev %d:%d type %s domain %s oldmask 0x%x newmask 0x%x grpmask 0x%x group 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XFS_HEALTHMON_TYPE_STRINGS),
+ __print_symbolic(__entry->domain, XFS_HEALTHMON_DOMAIN_STRINGS),
+ __entry->old_mask,
+ __entry->new_mask,
+ __entry->grpmask,
+ __entry->group)
+);
+
+TRACE_EVENT(xfs_healthmon_report_inode,
+ TP_PROTO(const struct xfs_healthmon *hm,
+ unsigned int old_mask, unsigned int new_mask,
+ const struct xfs_healthmon_event *event),
+ TP_ARGS(hm, old_mask, new_mask, event),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, type)
+ __field(unsigned int, domain)
+ __field(unsigned int, old_mask)
+ __field(unsigned int, new_mask)
+ __field(unsigned int, imask)
+ __field(unsigned long long, ino)
+ __field(unsigned int, gen)
+ ),
+ TP_fast_assign(
+ __entry->dev = hm->dev;
+ __entry->type = event->type;
+ __entry->domain = event->domain;
+ __entry->old_mask = old_mask;
+ __entry->new_mask = new_mask;
+ __entry->imask = event->imask;
+ __entry->ino = event->ino;
+ __entry->gen = event->gen;
+ ),
+ TP_printk("dev %d:%d type %s domain %s oldmask 0x%x newmask 0x%x imask 0x%x ino 0x%llx gen 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XFS_HEALTHMON_TYPE_STRINGS),
+ __print_symbolic(__entry->domain, XFS_HEALTHMON_DOMAIN_STRINGS),
+ __entry->old_mask,
+ __entry->new_mask,
+ __entry->imask,
+ __entry->ino,
+ __entry->gen)
+);
+
+TRACE_EVENT(xfs_healthmon_report_shutdown,
+ TP_PROTO(const struct xfs_healthmon *hm, uint32_t shutdown_flags),
+ TP_ARGS(hm, shutdown_flags),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(uint32_t, shutdown_flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = hm->dev;
+ __entry->shutdown_flags = shutdown_flags;
+ ),
+ TP_printk("dev %d:%d shutdown_flags %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_flags(__entry->shutdown_flags, "|", XFS_SHUTDOWN_STRINGS))
+);
+
+#define XFS_DEVICE_STRINGS \
+ { XFS_DEV_DATA, "datadev" }, \
+ { XFS_DEV_RT, "rtdev" }, \
+ { XFS_DEV_LOG, "logdev" }
+
+TRACE_DEFINE_ENUM(XFS_DEV_DATA);
+TRACE_DEFINE_ENUM(XFS_DEV_RT);
+TRACE_DEFINE_ENUM(XFS_DEV_LOG);
+
+TRACE_EVENT(xfs_healthmon_report_media,
+ TP_PROTO(const struct xfs_healthmon *hm, enum xfs_device fdev,
+ const struct xfs_healthmon_event *event),
+ TP_ARGS(hm, fdev, event),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, error_dev)
+ __field(uint64_t, daddr)
+ __field(uint64_t, bbcount)
+ ),
+ TP_fast_assign(
+ __entry->dev = hm->dev;
+ __entry->error_dev = fdev;
+ __entry->daddr = event->daddr;
+ __entry->bbcount = event->bbcount;
+ ),
+ TP_printk("dev %d:%d %s daddr 0x%llx bbcount 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->error_dev, XFS_DEVICE_STRINGS),
+ __entry->daddr,
+ __entry->bbcount)
+);
+
+#define FS_ERROR_STRINGS \
+ { FSERR_BUFFERED_READ, "buffered_read" }, \
+ { FSERR_BUFFERED_WRITE, "buffered_write" }, \
+ { FSERR_DIRECTIO_READ, "directio_read" }, \
+ { FSERR_DIRECTIO_WRITE, "directio_write" }, \
+ { FSERR_DATA_LOST, "data_lost" }, \
+ { FSERR_METADATA, "metadata" }
+
+TRACE_DEFINE_ENUM(FSERR_BUFFERED_READ);
+TRACE_DEFINE_ENUM(FSERR_BUFFERED_WRITE);
+TRACE_DEFINE_ENUM(FSERR_DIRECTIO_READ);
+TRACE_DEFINE_ENUM(FSERR_DIRECTIO_WRITE);
+TRACE_DEFINE_ENUM(FSERR_DATA_LOST);
+TRACE_DEFINE_ENUM(FSERR_METADATA);
+
+TRACE_EVENT(xfs_healthmon_report_file_ioerror,
+ TP_PROTO(const struct xfs_healthmon *hm,
+ const struct fserror_event *p),
+ TP_ARGS(hm, p),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, type)
+ __field(unsigned long long, ino)
+ __field(unsigned int, gen)
+ __field(long long, pos)
+ __field(unsigned long long, len)
+ __field(int, error)
+ ),
+ TP_fast_assign(
+ __entry->dev = hm->dev;
+ __entry->type = p->type;
+ __entry->ino = XFS_I(p->inode)->i_ino;
+ __entry->gen = p->inode->i_generation;
+ __entry->pos = p->pos;
+ __entry->len = p->len;
+ __entry->error = p->error;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx gen 0x%x op %s pos 0x%llx bytecount 0x%llx error %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->gen,
+ __print_symbolic(__entry->type, FS_ERROR_STRINGS),
+ __entry->pos,
+ __entry->len,
+ __entry->error)
+);
+
+TRACE_EVENT(xfs_verify_media,
+ TP_PROTO(const struct xfs_mount *mp, const struct xfs_verify_media *me,
+ dev_t fdev, xfs_daddr_t daddr, uint64_t bbcount,
+ const struct folio *folio),
+ TP_ARGS(mp, me, fdev, daddr, bbcount, folio),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, fdev)
+ __field(xfs_daddr_t, start_daddr)
+ __field(xfs_daddr_t, end_daddr)
+ __field(unsigned int, flags)
+ __field(xfs_daddr_t, daddr)
+ __field(uint64_t, bbcount)
+ __field(unsigned int, bufsize)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_ddev_targp->bt_dev;
+ __entry->fdev = fdev;
+ __entry->start_daddr = me->me_start_daddr;
+ __entry->end_daddr = me->me_end_daddr;
+ __entry->flags = me->me_flags;
+ __entry->daddr = daddr;
+ __entry->bbcount = bbcount;
+ __entry->bufsize = folio_size(folio);
+ ),
+ TP_printk("dev %d:%d fdev %d:%d start_daddr 0x%llx end_daddr 0x%llx flags 0x%x daddr 0x%llx bbcount 0x%llx bufsize 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->fdev), MINOR(__entry->fdev),
+ __entry->start_daddr,
+ __entry->end_daddr,
+ __entry->flags,
+ __entry->daddr,
+ __entry->bbcount,
+ __entry->bufsize)
+);
+
+TRACE_EVENT(xfs_verify_media_end,
+ TP_PROTO(const struct xfs_mount *mp, const struct xfs_verify_media *me,
+ dev_t fdev),
+ TP_ARGS(mp, me, fdev),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, fdev)
+ __field(xfs_daddr_t, start_daddr)
+ __field(xfs_daddr_t, end_daddr)
+ __field(int, ioerror)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_ddev_targp->bt_dev;
+ __entry->fdev = fdev;
+ __entry->start_daddr = me->me_start_daddr;
+ __entry->end_daddr = me->me_end_daddr;
+ __entry->ioerror = me->me_ioerror;
+ ),
+ TP_printk("dev %d:%d fdev %d:%d start_daddr 0x%llx end_daddr 0x%llx ioerror %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->fdev), MINOR(__entry->fdev),
+ __entry->start_daddr,
+ __entry->end_daddr,
+ __entry->ioerror)
+);
+
+TRACE_EVENT(xfs_verify_media_error,
+ TP_PROTO(const struct xfs_mount *mp, const struct xfs_verify_media *me,
+ dev_t fdev, xfs_daddr_t daddr, uint64_t bbcount,
+ blk_status_t status),
+ TP_ARGS(mp, me, fdev, daddr, bbcount, status),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, fdev)
+ __field(xfs_daddr_t, start_daddr)
+ __field(xfs_daddr_t, end_daddr)
+ __field(unsigned int, flags)
+ __field(xfs_daddr_t, daddr)
+ __field(uint64_t, bbcount)
+ __field(int, error)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_ddev_targp->bt_dev;
+ __entry->fdev = fdev;
+ __entry->start_daddr = me->me_start_daddr;
+ __entry->end_daddr = me->me_end_daddr;
+ __entry->flags = me->me_flags;
+ __entry->daddr = daddr;
+ __entry->bbcount = bbcount;
+ __entry->error = blk_status_to_errno(status);
+ ),
+ TP_printk("dev %d:%d fdev %d:%d start_daddr 0x%llx end_daddr 0x%llx flags 0x%x daddr 0x%llx bbcount 0x%llx error %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->fdev), MINOR(__entry->fdev),
+ __entry->start_daddr,
+ __entry->end_daddr,
+ __entry->flags,
+ __entry->daddr,
+ __entry->bbcount,
+ __entry->error)
+);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 474f5a04ec63..bcc470f56e46 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -4,7 +4,7 @@
* Copyright (C) 2010 Red Hat, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
@@ -124,8 +124,6 @@ xfs_trans_dup(
ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
tp->t_rtx_res = tp->t_rtx_res_used;
- xfs_trans_switch_context(tp, ntp);
-
/* move deferred ops over to the new tp */
xfs_defer_move(ntp, tp);
@@ -1043,6 +1041,12 @@ xfs_trans_roll(
* locked be logged in the prior and the next transactions.
*/
tp = *tpp;
+ /*
+ * __xfs_trans_commit cleared the NOFS flag by calling into
+ * xfs_trans_free. Set it again here before doing memory
+ * allocations.
+ */
+ xfs_trans_set_context(tp);
error = xfs_log_regrant(tp->t_mountp, tp->t_ticket);
if (error)
return error;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7fb860f645a3..eb83c5dac032 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -9,6 +9,7 @@
/* kernel only transaction subsystem defines */
struct xlog;
+struct xlog_format_buf;
struct xfs_buf;
struct xfs_buftarg;
struct xfs_efd_log_item;
@@ -70,7 +71,8 @@ struct xfs_log_item {
struct xfs_item_ops {
unsigned flags;
void (*iop_size)(struct xfs_log_item *, int *, int *);
- void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *);
+ void (*iop_format)(struct xfs_log_item *lip,
+ struct xlog_format_buf *lfb);
void (*iop_pin)(struct xfs_log_item *);
void (*iop_unpin)(struct xfs_log_item *, int remove);
uint64_t (*iop_sort)(struct xfs_log_item *lip);
@@ -278,13 +280,4 @@ xfs_trans_clear_context(
memalloc_nofs_restore(tp->t_pflags);
}
-static inline void
-xfs_trans_switch_context(
- struct xfs_trans *old_tp,
- struct xfs_trans *new_tp)
-{
- new_tp->t_pflags = old_tp->t_pflags;
- old_tp->t_pflags = 0;
-}
-
#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 38983c6777df..363d7f88c2c6 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -4,7 +4,7 @@
* Copyright (c) 2008 Dave Chinner
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 53af546c0b23..95db73a37e57 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index c842ce06acd6..eaf9de6e07fd 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -3,7 +3,7 @@
* Copyright (c) 2000-2002 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c
new file mode 100644
index 000000000000..069cd371619d
--- /dev/null
+++ b/fs/xfs/xfs_verify_media.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2026 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs_platform.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_bit.h"
+#include "xfs_btree.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_trans.h"
+#include "xfs_alloc.h"
+#include "xfs_ag.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_health.h"
+#include "xfs_healthmon.h"
+#include "xfs_trace.h"
+#include "xfs_verify_media.h"
+
+#include <linux/fserror.h>
+
+struct xfs_group_data_lost {
+ xfs_agblock_t startblock;
+ xfs_extlen_t blockcount;
+};
+
+/* Report lost file data from rmap records */
+static int
+xfs_verify_report_data_lost(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *data)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_inode *ip;
+ struct xfs_group_data_lost *lost = data;
+ xfs_fileoff_t fileoff = rec->rm_offset;
+ xfs_extlen_t blocks = rec->rm_blockcount;
+ const bool is_attr =
+ (rec->rm_flags & XFS_RMAP_ATTR_FORK);
+ const xfs_agblock_t lost_end =
+ lost->startblock + lost->blockcount;
+ const xfs_agblock_t rmap_end =
+ rec->rm_startblock + rec->rm_blockcount;
+ int error = 0;
+
+ if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner))
+ return 0;
+
+ error = xfs_iget(mp, cur->bc_tp, rec->rm_owner, 0, 0, &ip);
+ if (error)
+ return 0;
+
+ if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
+ xfs_bmap_mark_sick(ip, is_attr ? XFS_ATTR_FORK : XFS_DATA_FORK);
+ goto out_rele;
+ }
+
+ if (is_attr) {
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_XATTR);
+ goto out_rele;
+ }
+
+ if (lost->startblock > rec->rm_startblock) {
+ fileoff += lost->startblock - rec->rm_startblock;
+ blocks -= lost->startblock - rec->rm_startblock;
+ }
+ if (rmap_end > lost_end)
+ blocks -= rmap_end - lost_end;
+
+ fserror_report_data_lost(VFS_I(ip), XFS_FSB_TO_B(mp, fileoff),
+ XFS_FSB_TO_B(mp, blocks), GFP_NOFS);
+
+out_rele:
+ xfs_irele(ip);
+ return 0;
+}
+
+/* Walk reverse mappings to look for all file data loss */
+static int
+xfs_verify_report_losses(
+ struct xfs_mount *mp,
+ enum xfs_group_type type,
+ xfs_daddr_t daddr,
+ u64 bblen)
+{
+ struct xfs_group *xg = NULL;
+ struct xfs_trans *tp;
+ xfs_fsblock_t start_bno, end_bno;
+ uint32_t start_gno, end_gno;
+ int error;
+
+ if (type == XG_TYPE_RTG) {
+ start_bno = xfs_daddr_to_rtb(mp, daddr);
+ end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1);
+ } else {
+ start_bno = XFS_DADDR_TO_FSB(mp, daddr);
+ end_bno = XFS_DADDR_TO_FSB(mp, daddr + bblen - 1);
+ }
+
+ tp = xfs_trans_alloc_empty(mp);
+ start_gno = xfs_fsb_to_gno(mp, start_bno, type);
+ end_gno = xfs_fsb_to_gno(mp, end_bno, type);
+ while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) {
+ struct xfs_buf *agf_bp = NULL;
+ struct xfs_rtgroup *rtg = NULL;
+ struct xfs_btree_cur *cur;
+ struct xfs_rmap_irec ri_low = { };
+ struct xfs_rmap_irec ri_high;
+ struct xfs_group_data_lost lost;
+
+ if (type == XG_TYPE_AG) {
+ struct xfs_perag *pag = to_perag(xg);
+
+ error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
+ if (error) {
+ xfs_perag_put(pag);
+ break;
+ }
+
+ cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag);
+ } else {
+ rtg = to_rtg(xg);
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
+ cur = xfs_rtrmapbt_init_cursor(tp, rtg);
+ }
+
+ /*
+ * Set the rmap range from ri_low to ri_high, which represents
+ * a [start, end] where we looking for the files or metadata.
+ */
+ memset(&ri_high, 0xFF, sizeof(ri_high));
+ if (xg->xg_gno == start_gno)
+ ri_low.rm_startblock =
+ xfs_fsb_to_gbno(mp, start_bno, type);
+ if (xg->xg_gno == end_gno)
+ ri_high.rm_startblock =
+ xfs_fsb_to_gbno(mp, end_bno, type);
+
+ lost.startblock = ri_low.rm_startblock;
+ lost.blockcount = min(xg->xg_block_count,
+ ri_high.rm_startblock + 1) -
+ ri_low.rm_startblock;
+
+ error = xfs_rmap_query_range(cur, &ri_low, &ri_high,
+ xfs_verify_report_data_lost, &lost);
+ xfs_btree_del_cursor(cur, error);
+ if (agf_bp)
+ xfs_trans_brelse(tp, agf_bp);
+ if (rtg)
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
+ if (error) {
+ xfs_group_put(xg);
+ break;
+ }
+ }
+
+ xfs_trans_cancel(tp);
+ return 0;
+}
+
+/*
+ * Compute the desired verify IO size.
+ *
+ * To minimize command overhead, we'd like to create bios that are 1MB, though
+ * we allow the user to ask for a smaller size.
+ */
+static unsigned int
+xfs_verify_iosize(
+ const struct xfs_verify_media *me,
+ struct xfs_buftarg *btp,
+ uint64_t bbcount)
+{
+ unsigned int iosize =
+ min_not_zero(SZ_1M, me->me_max_io_size);
+
+ BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT);
+ ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev));
+
+ return clamp(iosize, bdev_logical_block_size(btp->bt_bdev),
+ BBTOB(bbcount));
+}
+
+/* Allocate as much memory as we can get for verification buffer. */
+static struct folio *
+xfs_verify_alloc_folio(
+ const unsigned int iosize)
+{
+ unsigned int order = get_order(iosize);
+
+ while (order > 0) {
+ struct folio *folio =
+ folio_alloc(GFP_KERNEL | __GFP_NORETRY, order);
+
+ if (folio)
+ return folio;
+ order--;
+ }
+
+ return folio_alloc(GFP_KERNEL, 0);
+}
+
+/* Report any kind of problem verifying media */
+static void
+xfs_verify_media_error(
+ struct xfs_mount *mp,
+ struct xfs_verify_media *me,
+ struct xfs_buftarg *btp,
+ xfs_daddr_t daddr,
+ unsigned int bio_bbcount,
+ blk_status_t bio_status)
+{
+ trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr,
+ bio_bbcount, bio_status);
+
+ /*
+ * Pass any error, I/O or otherwise, up to the caller if we didn't
+ * successfully verify any bytes at all.
+ */
+ if (me->me_start_daddr == daddr)
+ me->me_ioerror = -blk_status_to_errno(bio_status);
+
+ /*
+ * PI validation failures, medium errors, or general IO errors are
+ * treated as indicators of data loss. Everything else are (hopefully)
+ * transient errors and are not reported to healthmon or fsnotify.
+ */
+ switch (bio_status) {
+ case BLK_STS_PROTECTION:
+ case BLK_STS_IOERR:
+ case BLK_STS_MEDIUM:
+ break;
+ default:
+ return;
+ }
+
+ if (!(me->me_flags & XFS_VERIFY_MEDIA_REPORT))
+ return;
+
+ xfs_healthmon_report_media(mp, me->me_dev, daddr, bio_bbcount);
+
+ if (!xfs_has_rmapbt(mp))
+ return;
+
+ switch (me->me_dev) {
+ case XFS_DEV_DATA:
+ xfs_verify_report_losses(mp, XG_TYPE_AG, daddr, bio_bbcount);
+ break;
+ case XFS_DEV_RT:
+ xfs_verify_report_losses(mp, XG_TYPE_RTG, daddr, bio_bbcount);
+ break;
+ }
+}
+
+/* Verify the media of an xfs device by submitting read requests to the disk. */
+static int
+xfs_verify_media(
+ struct xfs_mount *mp,
+ struct xfs_verify_media *me)
+{
+ struct xfs_buftarg *btp = NULL;
+ struct bio *bio;
+ struct folio *folio;
+ xfs_daddr_t daddr;
+ uint64_t bbcount;
+ int error = 0;
+
+ me->me_ioerror = 0;
+
+ switch (me->me_dev) {
+ case XFS_DEV_DATA:
+ btp = mp->m_ddev_targp;
+ break;
+ case XFS_DEV_LOG:
+ if (mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev)
+ btp = mp->m_logdev_targp;
+ break;
+ case XFS_DEV_RT:
+ btp = mp->m_rtdev_targp;
+ break;
+ }
+ if (!btp)
+ return -ENODEV;
+
+ /*
+ * If the caller told us to verify beyond the end of the disk, tell the
+ * user exactly where that was.
+ */
+ if (me->me_end_daddr > btp->bt_nr_sectors)
+ me->me_end_daddr = btp->bt_nr_sectors;
+
+ /* start and end have to be aligned to the lba size */
+ if (!IS_ALIGNED(BBTOB(me->me_start_daddr | me->me_end_daddr),
+ bdev_logical_block_size(btp->bt_bdev)))
+ return -EINVAL;
+
+ /*
+ * end_daddr is the exclusive end of the range, so if start_daddr
+ * reaches there (or beyond), there's no work to be done.
+ */
+ if (me->me_start_daddr >= me->me_end_daddr)
+ return 0;
+
+ /*
+ * There are three ranges involved here:
+ *
+ * - [me->me_start_daddr, me->me_end_daddr) is the range that the
+ * user wants to verify. end_daddr can be beyond the end of the
+ * disk; we'll constrain it to the end if necessary.
+ *
+ * - [daddr, me->me_end_daddr) is the range that we have not yet
+ * verified. We update daddr after each successful read.
+ * me->me_start_daddr is set to daddr before returning.
+ *
+ * - [daddr, daddr + bio_bbcount) is the range that we're currently
+ * verifying.
+ */
+ daddr = me->me_start_daddr;
+ bbcount = min_t(sector_t, me->me_end_daddr, btp->bt_nr_sectors) -
+ me->me_start_daddr;
+
+ folio = xfs_verify_alloc_folio(xfs_verify_iosize(me, btp, bbcount));
+ if (!folio)
+ return -ENOMEM;
+
+ trace_xfs_verify_media(mp, me, btp->bt_bdev->bd_dev, daddr, bbcount,
+ folio);
+
+ bio = bio_alloc(btp->bt_bdev, 1, REQ_OP_READ, GFP_KERNEL);
+ if (!bio) {
+ error = -ENOMEM;
+ goto out_folio;
+ }
+
+ while (bbcount > 0) {
+ unsigned int bio_bbcount;
+ blk_status_t bio_status;
+
+ bio_reset(bio, btp->bt_bdev, REQ_OP_READ);
+ bio->bi_iter.bi_sector = daddr;
+ bio_add_folio_nofail(bio, folio,
+ min(bbcount << SECTOR_SHIFT, folio_size(folio)),
+ 0);
+
+ /*
+ * Save the length of the bio before we submit it, because we
+ * need the original daddr and length for reporting IO errors
+ * if the bio fails.
+ */
+ bio_bbcount = bio->bi_iter.bi_size >> SECTOR_SHIFT;
+ submit_bio_wait(bio);
+ bio_status = bio->bi_status;
+ if (bio_status != BLK_STS_OK) {
+ xfs_verify_media_error(mp, me, btp, daddr, bio_bbcount,
+ bio_status);
+ error = 0;
+ break;
+ }
+
+ daddr += bio_bbcount;
+ bbcount -= bio_bbcount;
+
+ if (bbcount == 0)
+ break;
+
+ if (me->me_rest_us) {
+ ktime_t expires;
+
+ expires = ktime_add_ns(ktime_get(),
+ me->me_rest_us * 1000);
+ set_current_state(TASK_KILLABLE);
+ schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
+ }
+
+ if (fatal_signal_pending(current)) {
+ error = -EINTR;
+ break;
+ }
+
+ cond_resched();
+ }
+
+ bio_put(bio);
+out_folio:
+ folio_put(folio);
+
+ if (error)
+ return error;
+
+ /*
+ * Advance start_daddr to the end of what we verified if there wasn't
+ * an operational error.
+ */
+ me->me_start_daddr = daddr;
+ trace_xfs_verify_media_end(mp, me, btp->bt_bdev->bd_dev);
+ return 0;
+}
+
+int
+xfs_ioc_verify_media(
+ struct file *file,
+ struct xfs_verify_media __user *arg)
+{
+ struct xfs_verify_media me;
+ struct xfs_inode *ip = XFS_I(file_inode(file));
+ struct xfs_mount *mp = ip->i_mount;
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&me, arg, sizeof(me)))
+ return -EFAULT;
+
+ if (me.me_pad)
+ return -EINVAL;
+ if (me.me_flags & ~XFS_VERIFY_MEDIA_FLAGS)
+ return -EINVAL;
+
+ switch (me.me_dev) {
+ case XFS_DEV_DATA:
+ case XFS_DEV_LOG:
+ case XFS_DEV_RT:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ error = xfs_verify_media(mp, &me);
+ if (error)
+ return error;
+
+ if (copy_to_user(arg, &me, sizeof(me)))
+ return -EFAULT;
+
+ return 0;
+}
diff --git a/fs/xfs/xfs_verify_media.h b/fs/xfs/xfs_verify_media.h
new file mode 100644
index 000000000000..dc6eee9c8863
--- /dev/null
+++ b/fs/xfs/xfs_verify_media.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2026 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_VERIFY_MEDIA_H__
+#define __XFS_VERIFY_MEDIA_H__
+
+struct xfs_verify_media;
+int xfs_ioc_verify_media(struct file *file,
+ struct xfs_verify_media __user *arg);
+
+#endif /* __XFS_VERIFY_MEDIA_H__ */
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index ac5cecec9aa1..a735f16d9cd8 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -4,7 +4,7 @@
* Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index bbcf21704ea0..b60952565737 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -3,7 +3,7 @@
* Copyright (c) 2023-2025 Christoph Hellwig.
* Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -408,31 +408,6 @@ xfs_zone_free_blocks(
return 0;
}
-static struct xfs_group *
-xfs_find_free_zone(
- struct xfs_mount *mp,
- unsigned long start,
- unsigned long end)
-{
- struct xfs_zone_info *zi = mp->m_zone_info;
- XA_STATE (xas, &mp->m_groups[XG_TYPE_RTG].xa, start);
- struct xfs_group *xg;
-
- xas_lock(&xas);
- xas_for_each_marked(&xas, xg, end, XFS_RTG_FREE)
- if (atomic_inc_not_zero(&xg->xg_active_ref))
- goto found;
- xas_unlock(&xas);
- return NULL;
-
-found:
- xas_clear_mark(&xas, XFS_RTG_FREE);
- atomic_dec(&zi->zi_nr_free_zones);
- zi->zi_free_zone_cursor = xg->xg_gno;
- xas_unlock(&xas);
- return xg;
-}
-
static struct xfs_open_zone *
xfs_init_open_zone(
struct xfs_rtgroup *rtg,
@@ -472,13 +447,25 @@ xfs_open_zone(
bool is_gc)
{
struct xfs_zone_info *zi = mp->m_zone_info;
+ XA_STATE (xas, &mp->m_groups[XG_TYPE_RTG].xa, 0);
struct xfs_group *xg;
- xg = xfs_find_free_zone(mp, zi->zi_free_zone_cursor, ULONG_MAX);
- if (!xg)
- xg = xfs_find_free_zone(mp, 0, zi->zi_free_zone_cursor);
- if (!xg)
- return NULL;
+ /*
+ * Pick the free zone with lowest index. Zones in the beginning of the
+ * address space typically provides higher bandwidth than those at the
+ * end of the address space on HDDs.
+ */
+ xas_lock(&xas);
+ xas_for_each_marked(&xas, xg, ULONG_MAX, XFS_RTG_FREE)
+ if (atomic_inc_not_zero(&xg->xg_active_ref))
+ goto found;
+ xas_unlock(&xas);
+ return NULL;
+
+found:
+ xas_clear_mark(&xas, XFS_RTG_FREE);
+ atomic_dec(&zi->zi_nr_free_zones);
+ xas_unlock(&xas);
set_current_state(TASK_RUNNING);
return xfs_init_open_zone(to_rtg(xg), 0, write_hint, is_gc);
@@ -976,46 +963,106 @@ xfs_free_open_zones(
}
struct xfs_init_zones {
- struct xfs_mount *mp;
+ uint32_t zone_size;
+ uint32_t zone_capacity;
uint64_t available;
uint64_t reclaimable;
};
+/*
+ * For sequential write required zones, we restart writing at the hardware write
+ * pointer returned by xfs_validate_blk_zone().
+ *
+ * For conventional zones or conventional devices we have to query the rmap to
+ * find the highest recorded block and set the write pointer to the block after
+ * that. In case of a power loss this misses blocks where the data I/O has
+ * completed but not recorded in the rmap yet, and it also rewrites blocks if
+ * the most recently written ones got deleted again before unmount, but this is
+ * the best we can do without hardware support.
+ */
+static int
+xfs_query_write_pointer(
+ struct xfs_init_zones *iz,
+ struct xfs_rtgroup *rtg,
+ xfs_rgblock_t *write_pointer)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ struct block_device *bdev = mp->m_rtdev_targp->bt_bdev;
+ sector_t start = xfs_gbno_to_daddr(&rtg->rtg_group, 0);
+ xfs_rgblock_t highest_rgbno;
+ struct blk_zone zone = {};
+ int error;
+
+ if (bdev_is_zoned(bdev)) {
+ error = blkdev_get_zone_info(bdev, start, &zone);
+ if (error)
+ return error;
+ if (zone.start != start) {
+ xfs_warn(mp, "mismatched zone start: 0x%llx/0x%llx.",
+ zone.start, start);
+ return -EFSCORRUPTED;
+ }
+
+ if (!xfs_validate_blk_zone(mp, &zone, rtg_rgno(rtg),
+ iz->zone_size, iz->zone_capacity,
+ write_pointer))
+ return -EFSCORRUPTED;
+
+ /*
+ * Use the hardware write pointer returned by
+ * xfs_validate_blk_zone for sequential write required zones,
+ * else fall through to the rmap-based estimation below.
+ */
+ if (zone.cond != BLK_ZONE_COND_NOT_WP)
+ return 0;
+ }
+
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
+ highest_rgbno = xfs_rtrmap_highest_rgbno(rtg);
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
+
+ if (highest_rgbno == NULLRGBLOCK)
+ *write_pointer = 0;
+ else
+ *write_pointer = highest_rgbno + 1;
+ return 0;
+}
+
static int
xfs_init_zone(
struct xfs_init_zones *iz,
struct xfs_rtgroup *rtg,
- struct blk_zone *zone)
+ xfs_rgblock_t write_pointer)
{
struct xfs_mount *mp = rtg_mount(rtg);
struct xfs_zone_info *zi = mp->m_zone_info;
uint32_t used = rtg_rmap(rtg)->i_used_blocks;
- xfs_rgblock_t write_pointer, highest_rgbno;
int error;
- if (zone && !xfs_zone_validate(zone, rtg, &write_pointer))
+ if (write_pointer > rtg->rtg_extents) {
+ xfs_warn(mp, "zone %u has invalid write pointer (0x%x).",
+ rtg_rgno(rtg), write_pointer);
return -EFSCORRUPTED;
+ }
- /*
- * For sequential write required zones we retrieved the hardware write
- * pointer above.
- *
- * For conventional zones or conventional devices we don't have that
- * luxury. Instead query the rmap to find the highest recorded block
- * and set the write pointer to the block after that. In case of a
- * power loss this misses blocks where the data I/O has completed but
- * not recorded in the rmap yet, and it also rewrites blocks if the most
- * recently written ones got deleted again before unmount, but this is
- * the best we can do without hardware support.
- */
- if (!zone || zone->cond == BLK_ZONE_COND_NOT_WP) {
- xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
- highest_rgbno = xfs_rtrmap_highest_rgbno(rtg);
- if (highest_rgbno == NULLRGBLOCK)
- write_pointer = 0;
- else
- write_pointer = highest_rgbno + 1;
- xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
+ if (used > rtg->rtg_extents) {
+ xfs_warn(mp,
+"zone %u has used counter (0x%x) larger than zone capacity (0x%llx).",
+ rtg_rgno(rtg), used, rtg->rtg_extents);
+ return -EFSCORRUPTED;
+ }
+
+ if (used > write_pointer) {
+ xfs_warn(mp,
+"zone %u has used counter (0x%x) larger than write pointer (0x%x).",
+ rtg_rgno(rtg), used, write_pointer);
+ return -EFSCORRUPTED;
+ }
+
+ if (write_pointer == 0 && used != 0) {
+ xfs_warn(mp, "empty zone %u has non-zero used counter (0x%x).",
+ rtg_rgno(rtg), used);
+ return -EFSCORRUPTED;
}
/*
@@ -1056,35 +1103,6 @@ xfs_init_zone(
return 0;
}
-static int
-xfs_get_zone_info_cb(
- struct blk_zone *zone,
- unsigned int idx,
- void *data)
-{
- struct xfs_init_zones *iz = data;
- struct xfs_mount *mp = iz->mp;
- xfs_fsblock_t zsbno = xfs_daddr_to_rtb(mp, zone->start);
- xfs_rgnumber_t rgno;
- struct xfs_rtgroup *rtg;
- int error;
-
- if (xfs_rtb_to_rgbno(mp, zsbno) != 0) {
- xfs_warn(mp, "mismatched zone start 0x%llx.", zsbno);
- return -EFSCORRUPTED;
- }
-
- rgno = xfs_rtb_to_rgno(mp, zsbno);
- rtg = xfs_rtgroup_grab(mp, rgno);
- if (!rtg) {
- xfs_warn(mp, "realtime group not found for zone %u.", rgno);
- return -EFSCORRUPTED;
- }
- error = xfs_init_zone(iz, rtg, zone);
- xfs_rtgroup_rele(rtg);
- return error;
-}
-
/*
* Calculate the max open zone limit based on the of number of backing zones
* available.
@@ -1219,13 +1237,13 @@ xfs_mount_zones(
struct xfs_mount *mp)
{
struct xfs_init_zones iz = {
- .mp = mp,
+ .zone_capacity = mp->m_groups[XG_TYPE_RTG].blocks,
+ .zone_size = xfs_rtgroup_raw_size(mp),
};
- struct xfs_buftarg *bt = mp->m_rtdev_targp;
- xfs_extlen_t zone_blocks = mp->m_groups[XG_TYPE_RTG].blocks;
+ struct xfs_rtgroup *rtg = NULL;
int error;
- if (!bt) {
+ if (!mp->m_rtdev_targp) {
xfs_notice(mp, "RT device missing.");
return -EINVAL;
}
@@ -1253,7 +1271,7 @@ xfs_mount_zones(
return -ENOMEM;
xfs_info(mp, "%u zones of %u blocks (%u max open zones)",
- mp->m_sb.sb_rgcount, zone_blocks, mp->m_max_open_zones);
+ mp->m_sb.sb_rgcount, iz.zone_capacity, mp->m_max_open_zones);
trace_xfs_zones_mount(mp);
/*
@@ -1277,24 +1295,18 @@ xfs_mount_zones(
* or beneficial.
*/
mp->m_super->s_min_writeback_pages =
- XFS_FSB_TO_B(mp, min(zone_blocks, XFS_MAX_BMBT_EXTLEN)) >>
+ XFS_FSB_TO_B(mp, min(iz.zone_capacity, XFS_MAX_BMBT_EXTLEN)) >>
PAGE_SHIFT;
- if (bdev_is_zoned(bt->bt_bdev)) {
- error = blkdev_report_zones_cached(bt->bt_bdev,
- XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart),
- mp->m_sb.sb_rgcount, xfs_get_zone_info_cb, &iz);
- if (error < 0)
+ while ((rtg = xfs_rtgroup_next(mp, rtg))) {
+ xfs_rgblock_t write_pointer;
+
+ error = xfs_query_write_pointer(&iz, rtg, &write_pointer);
+ if (!error)
+ error = xfs_init_zone(&iz, rtg, write_pointer);
+ if (error) {
+ xfs_rtgroup_rele(rtg);
goto out_free_zone_info;
- } else {
- struct xfs_rtgroup *rtg = NULL;
-
- while ((rtg = xfs_rtgroup_next(mp, rtg))) {
- error = xfs_init_zone(&iz, rtg, NULL);
- if (error) {
- xfs_rtgroup_rele(rtg);
- goto out_free_zone_info;
- }
}
}
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index 3c52cc1497d4..1f1f9fc973af 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -3,7 +3,7 @@
* Copyright (c) 2023-2025 Christoph Hellwig.
* Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -16,6 +16,8 @@
#include "xfs_rmap.h"
#include "xfs_rtbitmap.h"
#include "xfs_rtrmap_btree.h"
+#include "xfs_errortag.h"
+#include "xfs_error.h"
#include "xfs_zone_alloc.h"
#include "xfs_zone_priv.h"
#include "xfs_zones.h"
@@ -50,23 +52,11 @@
*/
/*
- * Size of each GC scratch pad. This is also the upper bound for each
- * GC I/O, which helps to keep latency down.
+ * Size of each GC scratch allocation, and the number of buffers.
*/
-#define XFS_GC_CHUNK_SIZE SZ_1M
-
-/*
- * Scratchpad data to read GCed data into.
- *
- * The offset member tracks where the next allocation starts, and freed tracks
- * the amount of space that is not used anymore.
- */
-#define XFS_ZONE_GC_NR_SCRATCH 2
-struct xfs_zone_scratch {
- struct folio *folio;
- unsigned int offset;
- unsigned int freed;
-};
+#define XFS_GC_BUF_SIZE SZ_1M
+#define XFS_GC_NR_BUFS 2
+static_assert(XFS_GC_NR_BUFS < BIO_MAX_VECS);
/*
* Chunk that is read and written for each GC operation.
@@ -141,10 +131,17 @@ struct xfs_zone_gc_data {
struct bio_set bio_set;
/*
- * Scratchpad used, and index to indicated which one is used.
+ * Scratchpad to buffer GC data, organized as a ring buffer over
+ * discontiguous folios. scratch_head is where the buffer is filled,
+ * scratch_tail tracks the buffer space freed, and scratch_available
+ * counts the space available in the ring buffer between the head and
+ * the tail.
*/
- struct xfs_zone_scratch scratch[XFS_ZONE_GC_NR_SCRATCH];
- unsigned int scratch_idx;
+ struct folio *scratch_folios[XFS_GC_NR_BUFS];
+ unsigned int scratch_size;
+ unsigned int scratch_available;
+ unsigned int scratch_head;
+ unsigned int scratch_tail;
/*
* List of bios currently being read, written and reset.
@@ -210,20 +207,17 @@ xfs_zone_gc_data_alloc(
if (!data->iter.recs)
goto out_free_data;
- /*
- * We actually only need a single bio_vec. It would be nice to have
- * a flag that only allocates the inline bvecs and not the separate
- * bvec pool.
- */
if (bioset_init(&data->bio_set, 16, offsetof(struct xfs_gc_bio, bio),
BIOSET_NEED_BVECS))
goto out_free_recs;
- for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++) {
- data->scratch[i].folio =
- folio_alloc(GFP_KERNEL, get_order(XFS_GC_CHUNK_SIZE));
- if (!data->scratch[i].folio)
+ for (i = 0; i < XFS_GC_NR_BUFS; i++) {
+ data->scratch_folios[i] =
+ folio_alloc(GFP_KERNEL, get_order(XFS_GC_BUF_SIZE));
+ if (!data->scratch_folios[i])
goto out_free_scratch;
}
+ data->scratch_size = XFS_GC_BUF_SIZE * XFS_GC_NR_BUFS;
+ data->scratch_available = data->scratch_size;
INIT_LIST_HEAD(&data->reading);
INIT_LIST_HEAD(&data->writing);
INIT_LIST_HEAD(&data->resetting);
@@ -232,7 +226,7 @@ xfs_zone_gc_data_alloc(
out_free_scratch:
while (--i >= 0)
- folio_put(data->scratch[i].folio);
+ folio_put(data->scratch_folios[i]);
bioset_exit(&data->bio_set);
out_free_recs:
kfree(data->iter.recs);
@@ -247,8 +241,8 @@ xfs_zone_gc_data_free(
{
int i;
- for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++)
- folio_put(data->scratch[i].folio);
+ for (i = 0; i < XFS_GC_NR_BUFS; i++)
+ folio_put(data->scratch_folios[i]);
bioset_exit(&data->bio_set);
kfree(data->iter.recs);
kfree(data);
@@ -586,26 +580,6 @@ xfs_zone_gc_ensure_target(
return oz;
}
-static unsigned int
-xfs_zone_gc_scratch_available(
- struct xfs_zone_gc_data *data)
-{
- return XFS_GC_CHUNK_SIZE - data->scratch[data->scratch_idx].offset;
-}
-
-static bool
-xfs_zone_gc_space_available(
- struct xfs_zone_gc_data *data)
-{
- struct xfs_open_zone *oz;
-
- oz = xfs_zone_gc_ensure_target(data->mp);
- if (!oz)
- return false;
- return oz->oz_allocated < rtg_blocks(oz->oz_rtg) &&
- xfs_zone_gc_scratch_available(data);
-}
-
static void
xfs_zone_gc_end_io(
struct bio *bio)
@@ -632,8 +606,7 @@ xfs_zone_gc_alloc_blocks(
if (!oz)
return NULL;
- *count_fsb = min(*count_fsb,
- XFS_B_TO_FSB(mp, xfs_zone_gc_scratch_available(data)));
+ *count_fsb = min(*count_fsb, XFS_B_TO_FSB(mp, data->scratch_available));
/*
* Directly allocate GC blocks from the reserved pool.
@@ -664,6 +637,28 @@ xfs_zone_gc_alloc_blocks(
return oz;
}
+static void
+xfs_zone_gc_add_data(
+ struct xfs_gc_bio *chunk)
+{
+ struct xfs_zone_gc_data *data = chunk->data;
+ unsigned int len = chunk->len;
+ unsigned int off = data->scratch_head;
+
+ do {
+ unsigned int this_off = off % XFS_GC_BUF_SIZE;
+ unsigned int this_len = min(len, XFS_GC_BUF_SIZE - this_off);
+
+ bio_add_folio_nofail(&chunk->bio,
+ data->scratch_folios[off / XFS_GC_BUF_SIZE],
+ this_len, this_off);
+ len -= this_len;
+ off += this_len;
+ if (off == data->scratch_size)
+ off = 0;
+ } while (len);
+}
+
static bool
xfs_zone_gc_start_chunk(
struct xfs_zone_gc_data *data)
@@ -677,6 +672,7 @@ xfs_zone_gc_start_chunk(
struct xfs_inode *ip;
struct bio *bio;
xfs_daddr_t daddr;
+ unsigned int len;
bool is_seq;
if (xfs_is_shutdown(mp))
@@ -691,17 +687,19 @@ xfs_zone_gc_start_chunk(
return false;
}
- bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ, GFP_NOFS, &data->bio_set);
+ len = XFS_FSB_TO_B(mp, irec.rm_blockcount);
+ bio = bio_alloc_bioset(bdev,
+ min(howmany(len, XFS_GC_BUF_SIZE) + 1, XFS_GC_NR_BUFS),
+ REQ_OP_READ, GFP_NOFS, &data->bio_set);
chunk = container_of(bio, struct xfs_gc_bio, bio);
chunk->ip = ip;
chunk->offset = XFS_FSB_TO_B(mp, irec.rm_offset);
- chunk->len = XFS_FSB_TO_B(mp, irec.rm_blockcount);
+ chunk->len = len;
chunk->old_startblock =
xfs_rgbno_to_rtb(iter->victim_rtg, irec.rm_startblock);
chunk->new_daddr = daddr;
chunk->is_seq = is_seq;
- chunk->scratch = &data->scratch[data->scratch_idx];
chunk->data = data;
chunk->oz = oz;
chunk->victim_rtg = iter->victim_rtg;
@@ -710,13 +708,12 @@ xfs_zone_gc_start_chunk(
bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock);
bio->bi_end_io = xfs_zone_gc_end_io;
- bio_add_folio_nofail(bio, chunk->scratch->folio, chunk->len,
- chunk->scratch->offset);
- chunk->scratch->offset += chunk->len;
- if (chunk->scratch->offset == XFS_GC_CHUNK_SIZE) {
- data->scratch_idx =
- (data->scratch_idx + 1) % XFS_ZONE_GC_NR_SCRATCH;
- }
+ xfs_zone_gc_add_data(chunk);
+ data->scratch_head = (data->scratch_head + len) % data->scratch_size;
+ data->scratch_available -= len;
+
+ XFS_STATS_INC(mp, xs_gc_read_calls);
+
WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW);
list_add_tail(&chunk->entry, &data->reading);
xfs_zone_gc_iter_advance(iter, irec.rm_blockcount);
@@ -811,8 +808,6 @@ xfs_zone_gc_write_chunk(
{
struct xfs_zone_gc_data *data = chunk->data;
struct xfs_mount *mp = chunk->ip->i_mount;
- phys_addr_t bvec_paddr =
- bvec_phys(bio_first_bvec_all(&chunk->bio));
struct xfs_gc_bio *split_chunk;
if (chunk->bio.bi_status)
@@ -822,13 +817,13 @@ xfs_zone_gc_write_chunk(
return;
}
+ XFS_STATS_INC(mp, xs_gc_write_calls);
+ XFS_STATS_ADD(mp, xs_gc_bytes, chunk->len);
+
WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW);
list_move_tail(&chunk->entry, &data->writing);
- bio_reset(&chunk->bio, mp->m_rtdev_targp->bt_bdev, REQ_OP_WRITE);
- bio_add_folio_nofail(&chunk->bio, chunk->scratch->folio, chunk->len,
- offset_in_folio(chunk->scratch->folio, bvec_paddr));
-
+ bio_reuse(&chunk->bio, REQ_OP_WRITE);
while ((split_chunk = xfs_zone_gc_split_write(data, chunk)))
xfs_zone_gc_submit_write(data, split_chunk);
xfs_zone_gc_submit_write(data, chunk);
@@ -839,6 +834,7 @@ xfs_zone_gc_finish_chunk(
struct xfs_gc_bio *chunk)
{
uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
+ struct xfs_zone_gc_data *data = chunk->data;
struct xfs_inode *ip = chunk->ip;
struct xfs_mount *mp = ip->i_mount;
int error;
@@ -850,11 +846,9 @@ xfs_zone_gc_finish_chunk(
return;
}
- chunk->scratch->freed += chunk->len;
- if (chunk->scratch->freed == chunk->scratch->offset) {
- chunk->scratch->offset = 0;
- chunk->scratch->freed = 0;
- }
+ data->scratch_tail =
+ (data->scratch_tail + chunk->len) % data->scratch_size;
+ data->scratch_available += chunk->len;
/*
* Cycle through the iolock and wait for direct I/O and layouts to
@@ -906,39 +900,64 @@ out:
bio_put(&chunk->bio);
}
-static bool
-xfs_zone_gc_prepare_reset(
- struct bio *bio,
- struct xfs_rtgroup *rtg)
+static void
+xfs_submit_zone_reset_bio(
+ struct xfs_rtgroup *rtg,
+ struct bio *bio)
{
+ struct xfs_mount *mp = rtg_mount(rtg);
+
trace_xfs_zone_reset(rtg);
ASSERT(rtg_rmap(rtg)->i_used_blocks == 0);
+
+ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_ZONE_RESET)) {
+ bio_io_error(bio);
+ return;
+ }
+
+ XFS_STATS_INC(mp, xs_gc_zone_reset_calls);
+
bio->bi_iter.bi_sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0);
if (!bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) {
- if (!bdev_max_discard_sectors(bio->bi_bdev))
- return false;
- bio->bi_opf = REQ_OP_DISCARD | REQ_SYNC;
- bio->bi_iter.bi_size =
- XFS_FSB_TO_B(rtg_mount(rtg), rtg_blocks(rtg));
+ /*
+ * Also use the bio to drive the state machine when neither
+ * zone reset nor discard is supported to keep things simple.
+ */
+ if (!bdev_max_discard_sectors(bio->bi_bdev)) {
+ bio_endio(bio);
+ return;
+ }
+ bio->bi_opf &= ~REQ_OP_ZONE_RESET;
+ bio->bi_opf |= REQ_OP_DISCARD;
+ bio->bi_iter.bi_size = XFS_FSB_TO_B(mp, rtg_blocks(rtg));
}
- return true;
+ submit_bio(bio);
+}
+
+static void xfs_bio_wait_endio(struct bio *bio)
+{
+ complete(bio->bi_private);
}
int
xfs_zone_gc_reset_sync(
struct xfs_rtgroup *rtg)
{
- int error = 0;
+ DECLARE_COMPLETION_ONSTACK(done);
struct bio bio;
+ int error;
bio_init(&bio, rtg_mount(rtg)->m_rtdev_targp->bt_bdev, NULL, 0,
- REQ_OP_ZONE_RESET);
- if (xfs_zone_gc_prepare_reset(&bio, rtg))
- error = submit_bio_wait(&bio);
- bio_uninit(&bio);
+ REQ_OP_ZONE_RESET | REQ_SYNC);
+ bio.bi_private = &done;
+ bio.bi_end_io = xfs_bio_wait_endio;
+ xfs_submit_zone_reset_bio(rtg, &bio);
+ wait_for_completion_io(&done);
+ error = blk_status_to_errno(bio.bi_status);
+ bio_uninit(&bio);
return error;
}
@@ -973,15 +992,7 @@ xfs_zone_gc_reset_zones(
chunk->data = data;
WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW);
list_add_tail(&chunk->entry, &data->resetting);
-
- /*
- * Also use the bio to drive the state machine when neither
- * zone reset nor discard is supported to keep things simple.
- */
- if (xfs_zone_gc_prepare_reset(bio, rtg))
- submit_bio(bio);
- else
- bio_endio(bio);
+ xfs_submit_zone_reset_bio(rtg, bio);
} while (next);
}
@@ -989,9 +1000,15 @@ static bool
xfs_zone_gc_should_start_new_work(
struct xfs_zone_gc_data *data)
{
+ struct xfs_open_zone *oz;
+
if (xfs_is_shutdown(data->mp))
return false;
- if (!xfs_zone_gc_space_available(data))
+ if (!data->scratch_available)
+ return false;
+
+ oz = xfs_zone_gc_ensure_target(data->mp);
+ if (!oz || oz->oz_allocated == rtg_blocks(oz->oz_rtg))
return false;
if (!data->iter.victim_rtg) {
diff --git a/fs/xfs/xfs_zone_info.c b/fs/xfs/xfs_zone_info.c
index 07e30c596975..53eabbc3334c 100644
--- a/fs/xfs/xfs_zone_info.c
+++ b/fs/xfs/xfs_zone_info.c
@@ -3,7 +3,7 @@
* Copyright (c) 2023-2025 Christoph Hellwig.
* Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
diff --git a/fs/xfs/xfs_zone_priv.h b/fs/xfs/xfs_zone_priv.h
index ce7f0e2f4598..8fbf9a52964e 100644
--- a/fs/xfs/xfs_zone_priv.h
+++ b/fs/xfs/xfs_zone_priv.h
@@ -72,7 +72,6 @@ struct xfs_zone_info {
/*
* Free zone search cursor and number of free zones:
*/
- unsigned long zi_free_zone_cursor;
atomic_t zi_nr_free_zones;
/*
diff --git a/fs/xfs/xfs_zone_space_resv.c b/fs/xfs/xfs_zone_space_resv.c
index fc1a4d1ce10c..5c6e6ef627e4 100644
--- a/fs/xfs/xfs_zone_space_resv.c
+++ b/fs/xfs/xfs_zone_space_resv.c
@@ -3,7 +3,7 @@
* Copyright (c) 2023-2025 Christoph Hellwig.
* Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
*/
-#include "xfs.h"
+#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
diff --git a/include/linux/bio.h b/include/linux/bio.h
index c75a9b3672aa..6156f2d66d4a 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -414,6 +414,7 @@ static inline void bio_init_inline(struct bio *bio, struct block_device *bdev,
}
extern void bio_uninit(struct bio *);
void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf);
+void bio_reuse(struct bio *bio, blk_opf_t opf);
void bio_chain(struct bio *, struct bio *);
int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len,