summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Kconfig22
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c7
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c5
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c25
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c31
-rw-r--r--fs/xfs/libxfs/xfs_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c2
-rw-r--r--fs/xfs/libxfs/xfs_errortag.h114
-rw-r--r--fs/xfs/libxfs/xfs_exchmaps.c4
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c6
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c4
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c3
-rw-r--r--fs/xfs/libxfs/xfs_inode_util.c11
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h150
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h2
-rw-r--r--fs/xfs/libxfs/xfs_metafile.c2
-rw-r--r--fs/xfs/libxfs/xfs_ondisk.h2
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c7
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_sb.c9
-rw-r--r--fs/xfs/libxfs/xfs_zones.h7
-rw-r--r--fs/xfs/scrub/cow_repair.c4
-rw-r--r--fs/xfs/scrub/metapath.c12
-rw-r--r--fs/xfs/scrub/newbt.c9
-rw-r--r--fs/xfs/scrub/reap.c620
-rw-r--r--fs/xfs/scrub/repair.c2
-rw-r--r--fs/xfs/scrub/repair.h8
-rw-r--r--fs/xfs/scrub/symlink_repair.c2
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/scrub/trace.h45
-rw-r--r--fs/xfs/xfs_attr_item.c2
-rw-r--r--fs/xfs/xfs_buf.c46
-rw-r--r--fs/xfs/xfs_buf.h4
-rw-r--r--fs/xfs/xfs_buf_item_recover.c10
-rw-r--r--fs/xfs/xfs_error.c216
-rw-r--r--fs/xfs/xfs_error.h47
-rw-r--r--fs/xfs/xfs_extfree_item.c4
-rw-r--r--fs/xfs/xfs_extfree_item.h4
-rw-r--r--fs/xfs/xfs_file.c75
-rw-r--r--fs/xfs/xfs_globals.c2
-rw-r--r--fs/xfs/xfs_icache.c6
-rw-r--r--fs/xfs/xfs_inode.c119
-rw-r--r--fs/xfs/xfs_inode_item.c125
-rw-r--r--fs/xfs/xfs_inode_item.h10
-rw-r--r--fs/xfs/xfs_ioctl.c24
-rw-r--r--fs/xfs/xfs_iomap.c19
-rw-r--r--fs/xfs/xfs_iops.c14
-rw-r--r--fs/xfs/xfs_linux.h2
-rw-r--r--fs/xfs/xfs_log.c38
-rw-r--r--fs/xfs/xfs_log.h37
-rw-r--r--fs/xfs/xfs_log_priv.h4
-rw-r--r--fs/xfs/xfs_log_recover.c34
-rw-r--r--fs/xfs/xfs_mount.c13
-rw-r--r--fs/xfs/xfs_mount.h12
-rw-r--r--fs/xfs/xfs_mru_cache.c3
-rw-r--r--fs/xfs/xfs_notify_failure.c2
-rw-r--r--fs/xfs/xfs_super.c84
-rw-r--r--fs/xfs/xfs_sysctl.c29
-rw-r--r--fs/xfs/xfs_sysctl.h3
-rw-r--r--fs/xfs/xfs_trace.h2
-rw-r--r--fs/xfs/xfs_trans.c23
-rw-r--r--fs/xfs/xfs_trans_ail.c2
-rw-r--r--fs/xfs/xfs_zone_alloc.c120
65 files changed, 1248 insertions, 1011 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 065953475cf5..8930d5254e1d 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -25,7 +25,7 @@ config XFS_FS
config XFS_SUPPORT_V4
bool "Support deprecated V4 (crc=0) format"
depends on XFS_FS
- default y
+ default n
help
The V4 filesystem format lacks certain features that are supported
by the V5 format, such as metadata checksumming, strengthened
@@ -40,7 +40,7 @@ config XFS_SUPPORT_V4
filesystem is a V4 filesystem. If no such string is found, please
upgrade xfsprogs to the latest version and try again.
- This option will become default N in September 2025. Support for the
+ This option became default N in September 2025. Support for the
V4 format will be removed entirely in September 2030. Distributors
can say N here to withdraw support earlier.
@@ -50,7 +50,7 @@ config XFS_SUPPORT_V4
config XFS_SUPPORT_ASCII_CI
bool "Support deprecated case-insensitive ascii (ascii-ci=1) format"
depends on XFS_FS
- default y
+ default n
help
The ASCII case insensitivity filesystem feature only works correctly
on systems that have been coerced into using ISO 8859-1, and it does
@@ -67,7 +67,7 @@ config XFS_SUPPORT_ASCII_CI
filesystem is a case-insensitive filesystem. If no such string is
found, please upgrade xfsprogs to the latest version and try again.
- This option will become default N in September 2025. Support for the
+ This option became default N in September 2025. Support for the
feature will be removed entirely in September 2030. Distributors
can say N here to withdraw support earlier.
@@ -137,7 +137,7 @@ config XFS_BTREE_IN_MEM
config XFS_ONLINE_SCRUB
bool "XFS online metadata check support"
- default n
+ default y
depends on XFS_FS
depends on TMPFS && SHMEM
select XFS_LIVE_HOOKS
@@ -150,12 +150,8 @@ config XFS_ONLINE_SCRUB
advantage here is to look for problems proactively so that
they can be dealt with in a controlled manner.
- This feature is considered EXPERIMENTAL. Use with caution!
-
See the xfs_scrub man page in section 8 for additional information.
- If unsure, say N.
-
config XFS_ONLINE_SCRUB_STATS
bool "XFS online metadata check usage data collection"
default y
@@ -171,11 +167,9 @@ config XFS_ONLINE_SCRUB_STATS
Usage data are collected in /sys/kernel/debug/xfs/scrub.
- If unsure, say N.
-
config XFS_ONLINE_REPAIR
bool "XFS online metadata repair support"
- default n
+ default y
depends on XFS_FS && XFS_ONLINE_SCRUB
select XFS_BTREE_IN_MEM
help
@@ -186,12 +180,8 @@ config XFS_ONLINE_REPAIR
formatted with secondary metadata, such as reverse mappings and inode
parent pointers.
- This feature is considered EXPERIMENTAL. Use with caution!
-
See the xfs_scrub man page in section 8 for additional information.
- If unsure, say N.
-
config XFS_WARN
bool "XFS Verbose Warnings"
depends on XFS_FS && !XFS_DEBUG
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index fb79215a509d..8ac8230c3d3c 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -92,9 +92,8 @@ xfs_ag_resv_critical(
trace_xfs_ag_resv_critical(pag, type, avail);
/* Critically low if less than 10% or max btree height remains. */
- return XFS_TEST_ERROR(avail < orig / 10 ||
- avail < mp->m_agbtree_maxlevels,
- mp, XFS_ERRTAG_AG_RESV_CRITICAL);
+ return avail < orig / 10 || avail < mp->m_agbtree_maxlevels ||
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_AG_RESV_CRITICAL);
}
/*
@@ -203,7 +202,7 @@ __xfs_ag_resv_init(
return -EINVAL;
}
- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_AG_RESV_FAIL))
+ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_AG_RESV_FAIL))
error = -ENOSPC;
else
error = xfs_dec_fdblocks(mp, hidden_space, true);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 000cc7f4a3ce..ad381c73abc4 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -3321,7 +3321,7 @@ xfs_agf_read_verify(
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
else {
fa = xfs_agf_verify(bp);
- if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
+ if (fa || XFS_TEST_ERROR(mp, XFS_ERRTAG_ALLOC_READ_AGF))
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
}
}
@@ -4019,8 +4019,7 @@ __xfs_free_extent(
ASSERT(len != 0);
ASSERT(type != XFS_AG_RESV_AGFL);
- if (XFS_TEST_ERROR(false, mp,
- XFS_ERRTAG_FREE_EXTENT))
+ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_FREE_EXTENT))
return -EIO;
error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index fddb55605e0c..91c1b30ebaab 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -667,12 +667,8 @@ xfs_attr_shortform_bytesfit(
/*
* For attr2 we can try to move the forkoff if there is space in the
- * literal area, but for the old format we are done if there is no
- * space in the fixed attribute fork.
+ * literal area
*/
- if (!xfs_has_attr2(mp))
- return 0;
-
dsize = dp->i_df.if_bytes;
switch (dp->i_df.if_format) {
@@ -723,22 +719,16 @@ xfs_attr_shortform_bytesfit(
}
/*
- * Switch on the ATTR2 superblock bit (implies also FEATURES2) unless:
- * - noattr2 mount option is set,
- * - on-disk version bit says it is already set, or
- * - the attr2 mount option is not set to enable automatic upgrade from attr1.
+ * Switch on the ATTR2 superblock bit (implies also FEATURES2) unless
+ * on-disk version bit says it is already set
*/
STATIC void
xfs_sbversion_add_attr2(
struct xfs_mount *mp,
struct xfs_trans *tp)
{
- if (xfs_has_noattr2(mp))
- return;
if (mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT)
return;
- if (!xfs_has_attr2(mp))
- return;
spin_lock(&mp->m_sb_lock);
xfs_add_attr2(mp);
@@ -889,7 +879,7 @@ xfs_attr_sf_removename(
/*
* Fix up the start offset of the attribute fork
*/
- if (totsize == sizeof(struct xfs_attr_sf_hdr) && xfs_has_attr2(mp) &&
+ if (totsize == sizeof(struct xfs_attr_sf_hdr) &&
(dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
!(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE)) &&
!xfs_has_parent(mp)) {
@@ -900,7 +890,6 @@ xfs_attr_sf_removename(
ASSERT(dp->i_forkoff);
ASSERT(totsize > sizeof(struct xfs_attr_sf_hdr) ||
(args->op_flags & XFS_DA_OP_ADDNAME) ||
- !xfs_has_attr2(mp) ||
dp->i_df.if_format == XFS_DINODE_FMT_BTREE ||
xfs_has_parent(mp));
xfs_trans_log_inode(args->trans, dp,
@@ -1040,8 +1029,7 @@ xfs_attr_shortform_allfit(
bytes += xfs_attr_sf_entsize_byname(name_loc->namelen,
be16_to_cpu(name_loc->valuelen));
}
- if (xfs_has_attr2(dp->i_mount) &&
- (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
+ if ((dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
(bytes == sizeof(struct xfs_attr_sf_hdr)))
return -1;
return xfs_attr_shortform_bytesfit(dp, bytes);
@@ -1161,7 +1149,6 @@ xfs_attr3_leaf_to_shortform(
* this case.
*/
if (!(args->op_flags & XFS_DA_OP_REPLACE)) {
- ASSERT(xfs_has_attr2(dp->i_mount));
ASSERT(dp->i_df.if_format != XFS_DINODE_FMT_BTREE);
xfs_attr_fork_remove(dp, args->trans);
}
@@ -1225,7 +1212,7 @@ xfs_attr3_leaf_to_node(
trace_xfs_attr_leaf_to_node(args);
- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_ATTR_LEAF_TO_NODE)) {
+ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_ATTR_LEAF_TO_NODE)) {
error = -EIO;
goto out;
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index d954f9b8071f..53ef4b7e504d 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -997,8 +997,7 @@ xfs_bmap_add_attrfork_local(
static int
xfs_bmap_set_attrforkoff(
struct xfs_inode *ip,
- int size,
- int *version)
+ int size)
{
int default_size = xfs_default_attroffset(ip) >> 3;
@@ -1012,8 +1011,6 @@ xfs_bmap_set_attrforkoff(
ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size);
if (!ip->i_forkoff)
ip->i_forkoff = default_size;
- else if (xfs_has_attr2(ip->i_mount) && version)
- *version = 2;
break;
default:
ASSERT(0);
@@ -1035,7 +1032,6 @@ xfs_bmap_add_attrfork(
int rsvd) /* xact may use reserved blks */
{
struct xfs_mount *mp = tp->t_mountp;
- int version = 1; /* superblock attr version */
int logflags; /* logging flags */
int error; /* error return value */
@@ -1045,7 +1041,7 @@ xfs_bmap_add_attrfork(
ASSERT(!xfs_inode_has_attr_fork(ip));
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_bmap_set_attrforkoff(ip, size, &version);
+ error = xfs_bmap_set_attrforkoff(ip, size);
if (error)
return error;
@@ -1069,16 +1065,12 @@ xfs_bmap_add_attrfork(
xfs_trans_log_inode(tp, ip, logflags);
if (error)
return error;
- if (!xfs_has_attr(mp) ||
- (!xfs_has_attr2(mp) && version == 2)) {
+ if (!xfs_has_attr(mp)) {
bool log_sb = false;
spin_lock(&mp->m_sb_lock);
if (!xfs_has_attr(mp)) {
xfs_add_attr(mp);
- log_sb = true;
- }
- if (!xfs_has_attr2(mp) && version == 2) {
xfs_add_attr2(mp);
log_sb = true;
}
@@ -3662,8 +3654,7 @@ xfs_bmap_btalloc(
/* Trim the allocation back to the maximum an AG can fit. */
args.maxlen = min(ap->length, mp->m_ag_max_usable);
- if (unlikely(XFS_TEST_ERROR(false, mp,
- XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
+ if (unlikely(XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
error = xfs_bmap_exact_minlen_extent_alloc(ap, &args);
else if ((ap->datatype & XFS_ALLOC_USERDATA) &&
xfs_inode_is_filestream(ap->ip))
@@ -3849,7 +3840,7 @@ xfs_bmapi_read(
}
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
@@ -4200,7 +4191,7 @@ xfs_bmapi_write(
(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
@@ -4545,7 +4536,7 @@ xfs_bmapi_remap(
(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
@@ -5679,7 +5670,7 @@ xfs_bmap_collapse_extents(
int logflags = 0;
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
@@ -5795,7 +5786,7 @@ xfs_bmap_insert_extents(
int logflags = 0;
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
@@ -5900,7 +5891,7 @@ xfs_bmap_split_extent(
int i = 0;
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
@@ -6065,7 +6056,7 @@ xfs_bmap_finish_one(
trace_xfs_bmap_deferred(bi);
- if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE))
+ if (XFS_TEST_ERROR(tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE))
return -EIO;
switch (bi->bi_type) {
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index a61211d253f1..dbe9df8c3300 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -306,7 +306,7 @@ xfs_btree_check_block(
fa = __xfs_btree_check_block(cur, block, level, bp);
if (XFS_IS_CORRUPT(mp, fa != NULL) ||
- XFS_TEST_ERROR(false, mp, xfs_btree_block_errtag(cur))) {
+ XFS_TEST_ERROR(mp, xfs_btree_block_errtag(cur))) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
xfs_btree_mark_sick(cur);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 723a0643b838..90f7fc219fcc 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -565,7 +565,7 @@ xfs_da3_split(
trace_xfs_da_split(state->args);
- if (XFS_TEST_ERROR(false, state->mp, XFS_ERRTAG_DA_LEAF_SPLIT))
+ if (XFS_TEST_ERROR(state->mp, XFS_ERRTAG_DA_LEAF_SPLIT))
return -EIO;
/*
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 1775abcfa04d..82a338458a51 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -223,7 +223,7 @@ xfs_dir_ino_validate(
bool ino_ok = xfs_verify_dir_ino(mp, ino);
if (XFS_IS_CORRUPT(mp, !ino_ok) ||
- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DIR_INO_VALIDATE)) {
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_DIR_INO_VALIDATE)) {
xfs_warn(mp, "Invalid inode number 0x%Lx",
(unsigned long long) ino);
return -EFSCORRUPTED;
diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
index a53c5d40e084..de840abc0bcd 100644
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -4,14 +4,22 @@
* Copyright (C) 2017 Oracle.
* All Rights Reserved.
*/
-#ifndef __XFS_ERRORTAG_H_
+#if !defined(__XFS_ERRORTAG_H_) || defined(XFS_ERRTAG)
#define __XFS_ERRORTAG_H_
/*
- * error injection tags - the labels can be anything you want
- * but each tag should have its own unique number
+ * There are two ways to use this header file. The first way is to #include it
+ * bare, which will define all the XFS_ERRTAG_* error injection knobs for use
+ * with the XFS_TEST_ERROR macro. The second way is to enclose the #include
+ * with a #define for an XFS_ERRTAG macro, in which case the header will define
+ " an XFS_ERRTAGS macro that expands to invoke that XFS_ERRTAG macro for each
+ * defined error injection knob.
*/
+/*
+ * These are the actual error injection tags. The numbers should be consecutive
+ * because arrays are sized based on the maximum.
+ */
#define XFS_ERRTAG_NOERROR 0
#define XFS_ERRTAG_IFLUSH_1 1
#define XFS_ERRTAG_IFLUSH_2 2
@@ -71,49 +79,61 @@
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
*/
#define XFS_RANDOM_DEFAULT 100
-#define XFS_RANDOM_IFLUSH_1 XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_IFLUSH_2 XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_IFLUSH_3 XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_IFLUSH_4 XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_IFLUSH_5 XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_IFLUSH_6 XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_DA_READ_BUF XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_BTREE_CHECK_LBLOCK (XFS_RANDOM_DEFAULT/4)
-#define XFS_RANDOM_BTREE_CHECK_SBLOCK XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_ALLOC_READ_AGF XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_IALLOC_READ_AGI XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_ITOBP_INOTOBP XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_IUNLINK XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_IUNLINK_REMOVE XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_DIR_INO_VALIDATE XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_BULKSTAT_READ_CHUNK XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_IODONE_IOERR (XFS_RANDOM_DEFAULT/10)
-#define XFS_RANDOM_STRATREAD_IOERR (XFS_RANDOM_DEFAULT/10)
-#define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10)
-#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10)
-#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_FREE_EXTENT 1
-#define XFS_RANDOM_RMAP_FINISH_ONE 1
-#define XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE 1
-#define XFS_RANDOM_REFCOUNT_FINISH_ONE 1
-#define XFS_RANDOM_BMAP_FINISH_ONE 1
-#define XFS_RANDOM_AG_RESV_CRITICAL 4
-#define XFS_RANDOM_LOG_BAD_CRC 1
-#define XFS_RANDOM_LOG_ITEM_PIN 1
-#define XFS_RANDOM_BUF_LRU_REF 2
-#define XFS_RANDOM_FORCE_SCRUB_REPAIR 1
-#define XFS_RANDOM_FORCE_SUMMARY_RECALC 1
-#define XFS_RANDOM_IUNLINK_FALLBACK (XFS_RANDOM_DEFAULT/10)
-#define XFS_RANDOM_BUF_IOERROR XFS_RANDOM_DEFAULT
-#define XFS_RANDOM_REDUCE_MAX_IEXTENTS 1
-#define XFS_RANDOM_BMAP_ALLOC_MINLEN_EXTENT 1
-#define XFS_RANDOM_AG_RESV_FAIL 1
-#define XFS_RANDOM_LARP 1
-#define XFS_RANDOM_DA_LEAF_SPLIT 1
-#define XFS_RANDOM_ATTR_LEAF_TO_NODE 1
-#define XFS_RANDOM_WB_DELAY_MS 3000
-#define XFS_RANDOM_WRITE_DELAY_MS 3000
-#define XFS_RANDOM_EXCHMAPS_FINISH_ONE 1
-#define XFS_RANDOM_METAFILE_RESV_CRITICAL 4
+
+/*
+ * Table of errror injection knobs. The parameters to the XFS_ERRTAG macro are:
+ * 1. The XFS_ERRTAG_ flag but without the prefix;
+ * 2. The name of the sysfs knob; and
+ * 3. The default value for the knob.
+ */
+#ifdef XFS_ERRTAG
+# undef XFS_ERRTAGS
+# define XFS_ERRTAGS \
+XFS_ERRTAG(NOERROR, noerror, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(IFLUSH_1, iflush1, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(IFLUSH_2, iflush2, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(IFLUSH_3, iflush3, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(IFLUSH_4, iflush4, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(IFLUSH_5, iflush5, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(IFLUSH_6, iflush6, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(DA_READ_BUF, dareadbuf, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(BTREE_CHECK_LBLOCK, btree_chk_lblk, XFS_RANDOM_DEFAULT/4) \
+XFS_ERRTAG(BTREE_CHECK_SBLOCK, btree_chk_sblk, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(ALLOC_READ_AGF, readagf, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(IALLOC_READ_AGI, readagi, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(ITOBP_INOTOBP, itobp, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(IUNLINK, iunlink, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(IUNLINK_REMOVE, iunlinkrm, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(DIR_INO_VALIDATE, dirinovalid, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(BULKSTAT_READ_CHUNK, bulkstat, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(IODONE_IOERR, logiodone, XFS_RANDOM_DEFAULT/10) \
+XFS_ERRTAG(STRATREAD_IOERR, stratread, XFS_RANDOM_DEFAULT/10) \
+XFS_ERRTAG(STRATCMPL_IOERR, stratcmpl, XFS_RANDOM_DEFAULT/10) \
+XFS_ERRTAG(DIOWRITE_IOERR, diowrite, XFS_RANDOM_DEFAULT/10) \
+XFS_ERRTAG(BMAPIFORMAT, bmapifmt, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(FREE_EXTENT, free_extent, 1) \
+XFS_ERRTAG(RMAP_FINISH_ONE, rmap_finish_one, 1) \
+XFS_ERRTAG(REFCOUNT_CONTINUE_UPDATE, refcount_continue_update, 1) \
+XFS_ERRTAG(REFCOUNT_FINISH_ONE, refcount_finish_one, 1) \
+XFS_ERRTAG(BMAP_FINISH_ONE, bmap_finish_one, 1) \
+XFS_ERRTAG(AG_RESV_CRITICAL, ag_resv_critical, 4) \
+XFS_ERRTAG(LOG_BAD_CRC, log_bad_crc, 1) \
+XFS_ERRTAG(LOG_ITEM_PIN, log_item_pin, 1) \
+XFS_ERRTAG(BUF_LRU_REF, buf_lru_ref, 2) \
+XFS_ERRTAG(FORCE_SCRUB_REPAIR, force_repair, 1) \
+XFS_ERRTAG(FORCE_SUMMARY_RECALC, bad_summary, 1) \
+XFS_ERRTAG(IUNLINK_FALLBACK, iunlink_fallback, XFS_RANDOM_DEFAULT/10) \
+XFS_ERRTAG(BUF_IOERROR, buf_ioerror, XFS_RANDOM_DEFAULT) \
+XFS_ERRTAG(REDUCE_MAX_IEXTENTS, reduce_max_iextents, 1) \
+XFS_ERRTAG(BMAP_ALLOC_MINLEN_EXTENT, bmap_alloc_minlen_extent, 1) \
+XFS_ERRTAG(AG_RESV_FAIL, ag_resv_fail, 1) \
+XFS_ERRTAG(LARP, larp, 1) \
+XFS_ERRTAG(DA_LEAF_SPLIT, da_leaf_split, 1) \
+XFS_ERRTAG(ATTR_LEAF_TO_NODE, attr_leaf_to_node, 1) \
+XFS_ERRTAG(WB_DELAY_MS, wb_delay_ms, 3000) \
+XFS_ERRTAG(WRITE_DELAY_MS, write_delay_ms, 3000) \
+XFS_ERRTAG(EXCHMAPS_FINISH_ONE, exchmaps_finish_one, 1) \
+XFS_ERRTAG(METAFILE_RESV_CRITICAL, metafile_resv_crit, 4)
+#endif /* XFS_ERRTAG */
#endif /* __XFS_ERRORTAG_H_ */
diff --git a/fs/xfs/libxfs/xfs_exchmaps.c b/fs/xfs/libxfs/xfs_exchmaps.c
index 3f1d6a98c118..932ee4619e9e 100644
--- a/fs/xfs/libxfs/xfs_exchmaps.c
+++ b/fs/xfs/libxfs/xfs_exchmaps.c
@@ -616,7 +616,7 @@ xfs_exchmaps_finish_one(
return error;
}
- if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
+ if (XFS_TEST_ERROR(tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
return -EIO;
/* If we still have work to do, ask for a new transaction. */
@@ -882,7 +882,7 @@ xmi_ensure_delta_nextents(
&new_nextents))
return -EFBIG;
- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
+ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
new_nextents > 10)
return -EFBIG;
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 750111634d9f..d97295eaebe6 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2140,7 +2140,7 @@ xfs_difree_inobt(
* remove the chunk if the block size is large enough for multiple inode
* chunks (that might not be free).
*/
- if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
+ if (rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
xic->deleted = true;
xic->first_ino = xfs_agino_to_ino(pag, rec.ir_startino);
@@ -2286,7 +2286,7 @@ xfs_difree_finobt(
* enough for multiple chunks. Leave the finobt record to remain in sync
* with the inobt.
*/
- if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
+ if (rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
error = xfs_btree_delete(cur, &i);
if (error)
@@ -2706,7 +2706,7 @@ xfs_agi_read_verify(
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
else {
fa = xfs_agi_verify(bp);
- if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI))
+ if (fa || XFS_TEST_ERROR(mp, XFS_ERRTAG_IALLOC_READ_AGI))
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
}
}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index aa13fc00afd7..b1812b2c3cce 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -61,8 +61,8 @@ xfs_inode_buf_verify(
di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
xfs_dinode_good_version(mp, dip->di_version) &&
xfs_verify_agino_or_null(bp->b_pag, unlinked_ino);
- if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
- XFS_ERRTAG_ITOBP_INOTOBP))) {
+ if (unlikely(!di_ok ||
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_ITOBP_INOTOBP))) {
if (readahead) {
bp->b_flags &= ~XBF_DONE;
xfs_buf_ioerror(bp, -EIO);
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 4f99b90add55..1772d82f2d68 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -756,8 +756,7 @@ xfs_iext_count_extend(
if (nr_exts < ifp->if_nextents)
return -EFBIG;
- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
- nr_exts > 10)
+ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) && nr_exts > 10)
return -EFBIG;
if (nr_exts > xfs_iext_max_nextents(has_large, whichfork)) {
diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c
index 48fe49a5f050..309ce6dd5553 100644
--- a/fs/xfs/libxfs/xfs_inode_util.c
+++ b/fs/xfs/libxfs/xfs_inode_util.c
@@ -299,17 +299,6 @@ xfs_inode_init(
} else {
inode_init_owner(args->idmap, inode, dir, args->mode);
}
-
- /*
- * If the group ID of the new file does not match the effective
- * group ID or one of the supplementary group IDs, the S_ISGID
- * bit is cleared (and only if the irix_sgid_inherit
- * compatibility variable is set).
- */
- if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
- !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode)))
- inode->i_mode &= ~S_ISGID;
-
ip->i_projid = xfs_get_initial_prid(pip);
}
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 0d637c276db0..6c50cb2ece19 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -86,43 +86,6 @@ struct xfs_unmount_log_format {
uint32_t pad2; /* may as well make it 64 bits */
};
-/* Region types for iovec's i_type */
-#define XLOG_REG_TYPE_BFORMAT 1
-#define XLOG_REG_TYPE_BCHUNK 2
-#define XLOG_REG_TYPE_EFI_FORMAT 3
-#define XLOG_REG_TYPE_EFD_FORMAT 4
-#define XLOG_REG_TYPE_IFORMAT 5
-#define XLOG_REG_TYPE_ICORE 6
-#define XLOG_REG_TYPE_IEXT 7
-#define XLOG_REG_TYPE_IBROOT 8
-#define XLOG_REG_TYPE_ILOCAL 9
-#define XLOG_REG_TYPE_IATTR_EXT 10
-#define XLOG_REG_TYPE_IATTR_BROOT 11
-#define XLOG_REG_TYPE_IATTR_LOCAL 12
-#define XLOG_REG_TYPE_QFORMAT 13
-#define XLOG_REG_TYPE_DQUOT 14
-#define XLOG_REG_TYPE_QUOTAOFF 15
-#define XLOG_REG_TYPE_LRHEADER 16
-#define XLOG_REG_TYPE_UNMOUNT 17
-#define XLOG_REG_TYPE_COMMIT 18
-#define XLOG_REG_TYPE_TRANSHDR 19
-#define XLOG_REG_TYPE_ICREATE 20
-#define XLOG_REG_TYPE_RUI_FORMAT 21
-#define XLOG_REG_TYPE_RUD_FORMAT 22
-#define XLOG_REG_TYPE_CUI_FORMAT 23
-#define XLOG_REG_TYPE_CUD_FORMAT 24
-#define XLOG_REG_TYPE_BUI_FORMAT 25
-#define XLOG_REG_TYPE_BUD_FORMAT 26
-#define XLOG_REG_TYPE_ATTRI_FORMAT 27
-#define XLOG_REG_TYPE_ATTRD_FORMAT 28
-#define XLOG_REG_TYPE_ATTR_NAME 29
-#define XLOG_REG_TYPE_ATTR_VALUE 30
-#define XLOG_REG_TYPE_XMI_FORMAT 31
-#define XLOG_REG_TYPE_XMD_FORMAT 32
-#define XLOG_REG_TYPE_ATTR_NEWNAME 33
-#define XLOG_REG_TYPE_ATTR_NEWVALUE 34
-#define XLOG_REG_TYPE_MAX 34
-
/*
* Flags to log operation header
*
@@ -141,14 +104,13 @@ struct xfs_unmount_log_format {
#define XLOG_END_TRANS 0x10 /* End a continued transaction */
#define XLOG_UNMOUNT_TRANS 0x20 /* Unmount a filesystem transaction */
-
-typedef struct xlog_op_header {
+struct xlog_op_header {
__be32 oh_tid; /* transaction id of operation : 4 b */
__be32 oh_len; /* bytes in data region : 4 b */
__u8 oh_clientid; /* who sent me this : 1 b */
__u8 oh_flags; /* : 1 b */
__u16 oh_res2; /* 32 bit align : 2 b */
-} xlog_op_header_t;
+};
/* valid values for h_fmt */
#define XLOG_FMT_UNKNOWN 0
@@ -174,12 +136,40 @@ typedef struct xlog_rec_header {
__be32 h_prev_block; /* block number to previous LR : 4 */
__be32 h_num_logops; /* number of log operations in this LR : 4 */
__be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
- /* new fields */
+
+ /* fields added by the Linux port: */
__be32 h_fmt; /* format of log record : 4 */
uuid_t h_fs_uuid; /* uuid of FS : 16 */
+
+ /* fields added for log v2: */
__be32 h_size; /* iclog size : 4 */
+
+ /*
+ * When h_size added for log v2 support, it caused structure to have
+ * a different size on i386 vs all other architectures because the
+ * sum of the size ofthe member is not aligned by that of the largest
+ * __be64-sized member, and i386 has really odd struct alignment rules.
+ *
+ * Due to the way the log headers are placed out on-disk that alone is
+ * not a problem becaue the xlog_rec_header always sits alone in a
+ * BBSIZEs area, and the rest of that area is padded with zeroes.
+ * But xlog_cksum used to calculate the checksum based on the structure
+ * size, and thus gives different checksums for i386 vs the rest.
+ * We now do two checksum validation passes for both sizes to allow
+ * moving v5 file systems with unclean logs between i386 and other
+ * (little-endian) architectures.
+ */
+ __u32 h_pad0;
} xlog_rec_header_t;
+#ifdef __i386__
+#define XLOG_REC_SIZE offsetofend(struct xlog_rec_header, h_size)
+#define XLOG_REC_SIZE_OTHER sizeof(struct xlog_rec_header)
+#else
+#define XLOG_REC_SIZE sizeof(struct xlog_rec_header)
+#define XLOG_REC_SIZE_OTHER offsetofend(struct xlog_rec_header, h_size)
+#endif /* __i386__ */
+
typedef struct xlog_rec_ext_header {
__be32 xh_cycle; /* write cycle of log : 4 */
__be32 xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */
@@ -195,12 +185,11 @@ typedef union xlog_in_core2 {
} xlog_in_core_2_t;
/* not an on-disk structure, but needed by log recovery in userspace */
-typedef struct xfs_log_iovec {
+struct xfs_log_iovec {
void *i_addr; /* beginning address of region */
int i_len; /* length in bytes of region */
uint i_type; /* type of region */
-} xfs_log_iovec_t;
-
+};
/*
* Transaction Header definitions.
@@ -213,12 +202,12 @@ typedef struct xfs_log_iovec {
* Do not change the below structure without redoing the code in
* xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans().
*/
-typedef struct xfs_trans_header {
+struct xfs_trans_header {
uint th_magic; /* magic number */
uint th_type; /* transaction type */
int32_t th_tid; /* transaction id (unused) */
uint th_num_items; /* num items logged by trans */
-} xfs_trans_header_t;
+};
#define XFS_TRANS_HEADER_MAGIC 0x5452414e /* TRAN */
@@ -542,7 +531,7 @@ struct xfs_log_dinode {
#define __XFS_BLF_DATAMAP_SIZE ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD)
#define XFS_BLF_DATAMAP_SIZE (__XFS_BLF_DATAMAP_SIZE + 1)
-typedef struct xfs_buf_log_format {
+struct xfs_buf_log_format {
unsigned short blf_type; /* buf log item type indicator */
unsigned short blf_size; /* size of this item */
unsigned short blf_flags; /* misc state */
@@ -550,7 +539,7 @@ typedef struct xfs_buf_log_format {
int64_t blf_blkno; /* starting blkno of this buf */
unsigned int blf_map_size; /* used size of data bitmap in words */
unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
-} xfs_buf_log_format_t;
+};
/*
* All buffers now need to tell recovery where the magic number
@@ -606,40 +595,41 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf)
/*
* EFI/EFD log format definitions
*/
-typedef struct xfs_extent {
+struct xfs_extent {
xfs_fsblock_t ext_start;
xfs_extlen_t ext_len;
-} xfs_extent_t;
+};
/*
- * Since an xfs_extent_t has types (start:64, len: 32)
- * there are different alignments on 32 bit and 64 bit kernels.
- * So we provide the different variants for use by a
- * conversion routine.
+ * Since the structures in struct xfs_extent add up to 96 bytes, it has
+ * different alignments on i386 vs all other architectures, because i386
+ * does not pad structures to their natural alignment.
+ *
+ * Provide the different variants for use by a conversion routine.
*/
-typedef struct xfs_extent_32 {
+struct xfs_extent_32 {
uint64_t ext_start;
uint32_t ext_len;
-} __attribute__((packed)) xfs_extent_32_t;
+} __attribute__((packed));
-typedef struct xfs_extent_64 {
+struct xfs_extent_64 {
uint64_t ext_start;
uint32_t ext_len;
uint32_t ext_pad;
-} xfs_extent_64_t;
+};
/*
* This is the structure used to lay out an efi log item in the
* log. The efi_extents field is a variable size array whose
* size is given by efi_nextents.
*/
-typedef struct xfs_efi_log_format {
+struct xfs_efi_log_format {
uint16_t efi_type; /* efi log item type */
uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */
- xfs_extent_t efi_extents[]; /* array of extents to free */
-} xfs_efi_log_format_t;
+ struct xfs_extent efi_extents[]; /* array of extents to free */
+};
static inline size_t
xfs_efi_log_format_sizeof(
@@ -649,13 +639,13 @@ xfs_efi_log_format_sizeof(
nr * sizeof(struct xfs_extent);
}
-typedef struct xfs_efi_log_format_32 {
+struct xfs_efi_log_format_32 {
uint16_t efi_type; /* efi log item type */
uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */
- xfs_extent_32_t efi_extents[]; /* array of extents to free */
-} __attribute__((packed)) xfs_efi_log_format_32_t;
+ struct xfs_extent_32 efi_extents[]; /* array of extents to free */
+} __attribute__((packed));
static inline size_t
xfs_efi_log_format32_sizeof(
@@ -665,13 +655,13 @@ xfs_efi_log_format32_sizeof(
nr * sizeof(struct xfs_extent_32);
}
-typedef struct xfs_efi_log_format_64 {
+struct xfs_efi_log_format_64 {
uint16_t efi_type; /* efi log item type */
uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */
- xfs_extent_64_t efi_extents[]; /* array of extents to free */
-} xfs_efi_log_format_64_t;
+ struct xfs_extent_64 efi_extents[]; /* array of extents to free */
+};
static inline size_t
xfs_efi_log_format64_sizeof(
@@ -686,13 +676,13 @@ xfs_efi_log_format64_sizeof(
* log. The efd_extents array is a variable size array whose
* size is given by efd_nextents;
*/
-typedef struct xfs_efd_log_format {
+struct xfs_efd_log_format {
uint16_t efd_type; /* efd log item type */
uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */
- xfs_extent_t efd_extents[]; /* array of extents freed */
-} xfs_efd_log_format_t;
+ struct xfs_extent efd_extents[]; /* array of extents freed */
+};
static inline size_t
xfs_efd_log_format_sizeof(
@@ -702,13 +692,13 @@ xfs_efd_log_format_sizeof(
nr * sizeof(struct xfs_extent);
}
-typedef struct xfs_efd_log_format_32 {
+struct xfs_efd_log_format_32 {
uint16_t efd_type; /* efd log item type */
uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */
- xfs_extent_32_t efd_extents[]; /* array of extents freed */
-} __attribute__((packed)) xfs_efd_log_format_32_t;
+ struct xfs_extent_32 efd_extents[]; /* array of extents freed */
+} __attribute__((packed));
static inline size_t
xfs_efd_log_format32_sizeof(
@@ -718,13 +708,13 @@ xfs_efd_log_format32_sizeof(
nr * sizeof(struct xfs_extent_32);
}
-typedef struct xfs_efd_log_format_64 {
+struct xfs_efd_log_format_64 {
uint16_t efd_type; /* efd log item type */
uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */
- xfs_extent_64_t efd_extents[]; /* array of extents freed */
-} xfs_efd_log_format_64_t;
+ struct xfs_extent_64 efd_extents[]; /* array of extents freed */
+};
static inline size_t
xfs_efd_log_format64_sizeof(
@@ -957,14 +947,14 @@ struct xfs_xmd_log_format {
* The first two fields must be the type and size fitting into
* 32 bits : log_recovery code assumes that.
*/
-typedef struct xfs_dq_logformat {
+struct xfs_dq_logformat {
uint16_t qlf_type; /* dquot log item type */
uint16_t qlf_size; /* size of this item */
xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */
int64_t qlf_blkno; /* blkno of dquot buffer */
int32_t qlf_len; /* len of dquot buffer */
uint32_t qlf_boffset; /* off of dquot in buffer */
-} xfs_dq_logformat_t;
+};
/*
* log format struct for QUOTAOFF records.
@@ -974,12 +964,12 @@ typedef struct xfs_dq_logformat {
* to the first and ensures that the first logitem is taken out of the AIL
* only when the last one is securely committed.
*/
-typedef struct xfs_qoff_logformat {
+struct xfs_qoff_logformat {
unsigned short qf_type; /* quotaoff log item type */
unsigned short qf_size; /* size of this item */
unsigned int qf_flags; /* USR and/or GRP */
char qf_pad[12]; /* padding for future */
-} xfs_qoff_logformat_t;
+};
/*
* Disk quotas status in m_qflags, and also sb_qflags. 16 bits.
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 95de23095030..9e712e62369c 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -111,7 +111,7 @@ struct xlog_recover_item {
struct xlog_recover {
struct hlist_node r_list;
xlog_tid_t r_log_tid; /* log's transaction id */
- xfs_trans_header_t r_theader; /* trans header for partial */
+ struct xfs_trans_header r_theader; /* trans header for partial */
int r_state; /* not needed */
xfs_lsn_t r_lsn; /* xact lsn */
struct list_head r_itemq; /* q for items */
diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c
index 225923e463c4..b02e3d6c0868 100644
--- a/fs/xfs/libxfs/xfs_metafile.c
+++ b/fs/xfs/libxfs/xfs_metafile.c
@@ -121,7 +121,7 @@ xfs_metafile_resv_critical(
div_u64(mp->m_metafile_resv_target, 10)))
return true;
- return XFS_TEST_ERROR(false, mp, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
+ return XFS_TEST_ERROR(mp, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
}
/* Allocate a block from the metadata file's reservation. */
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index 5ed44fdf7491..7bfa3242e2c5 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -174,6 +174,8 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32);
XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xlog_rec_header, 328);
+ XFS_CHECK_STRUCT_SIZE(struct xlog_rec_ext_header, 260);
XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16);
XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16);
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 897784037483..2484dc9f6d7e 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1113,8 +1113,7 @@ xfs_refcount_still_have_space(
* refcount continue update "error" has been injected.
*/
if (cur->bc_refc.nr_ops > 2 &&
- XFS_TEST_ERROR(false, cur->bc_mp,
- XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
+ XFS_TEST_ERROR(cur->bc_mp, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
return false;
if (cur->bc_refc.nr_ops == 0)
@@ -1398,7 +1397,7 @@ xfs_refcount_finish_one(
trace_xfs_refcount_deferred(mp, ri);
- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
+ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
return -EIO;
/*
@@ -1511,7 +1510,7 @@ xfs_rtrefcount_finish_one(
trace_xfs_refcount_deferred(mp, ri);
- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
+ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
return -EIO;
/*
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 3cdf50563fec..83e0488ff773 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2690,7 +2690,7 @@ xfs_rmap_finish_one(
trace_xfs_rmap_deferred(mp, ri);
- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE))
+ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_RMAP_FINISH_ONE))
return -EIO;
/*
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 5057536e586c..618061d898d4 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1067,7 +1067,7 @@ xfs_rtfree_extent(
ASSERT(rbmip->i_itemp != NULL);
xfs_assert_ilocked(rbmip, XFS_ILOCK_EXCL);
- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FREE_EXTENT))
+ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_FREE_EXTENT))
return -EIO;
error = xfs_rtcheck_alloc_range(&args, start, len);
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 711e180f9ebb..cdd16dd805d7 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -142,8 +142,6 @@ xfs_sb_version_to_features(
if (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) {
if (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)
features |= XFS_FEAT_LAZYSBCOUNT;
- if (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT)
- features |= XFS_FEAT_ATTR2;
if (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT)
features |= XFS_FEAT_PROJID32;
if (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)
@@ -155,7 +153,7 @@ xfs_sb_version_to_features(
/* Always on V5 features */
features |= XFS_FEAT_ALIGN | XFS_FEAT_LOGV2 | XFS_FEAT_EXTFLG |
- XFS_FEAT_LAZYSBCOUNT | XFS_FEAT_ATTR2 | XFS_FEAT_PROJID32 |
+ XFS_FEAT_LAZYSBCOUNT | XFS_FEAT_PROJID32 |
XFS_FEAT_V3INODES | XFS_FEAT_CRC | XFS_FEAT_PQUOTINO;
/* Optional V5 features */
@@ -1524,7 +1522,8 @@ xfs_fs_geometry(
geo->version = XFS_FSOP_GEOM_VERSION;
geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
XFS_FSOP_GEOM_FLAGS_DIRV2 |
- XFS_FSOP_GEOM_FLAGS_EXTFLG;
+ XFS_FSOP_GEOM_FLAGS_EXTFLG |
+ XFS_FSOP_GEOM_FLAGS_ATTR2;
if (xfs_has_attr(mp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR;
if (xfs_has_quota(mp))
@@ -1537,8 +1536,6 @@ xfs_fs_geometry(
geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI;
if (xfs_has_lazysbcount(mp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB;
- if (xfs_has_attr2(mp))
- geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2;
if (xfs_has_projid32(mp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32;
if (xfs_has_crc(mp))
diff --git a/fs/xfs/libxfs/xfs_zones.h b/fs/xfs/libxfs/xfs_zones.h
index c4f1367b2cca..5fefd132e002 100644
--- a/fs/xfs/libxfs/xfs_zones.h
+++ b/fs/xfs/libxfs/xfs_zones.h
@@ -29,6 +29,13 @@ struct xfs_rtgroup;
#define XFS_OPEN_GC_ZONES 1U
#define XFS_MIN_OPEN_ZONES (XFS_OPEN_GC_ZONES + 1U)
+/*
+ * For zoned devices that do not have a limit on the number of open zones, and
+ * for regular devices using the zoned allocator, use the most common SMR disks
+ * limit (128) as the default limit on the number of open zones.
+ */
+#define XFS_DEFAULT_MAX_OPEN_ZONES 128
+
bool xfs_zone_validate(struct blk_zone *zone, struct xfs_rtgroup *rtg,
xfs_rgblock_t *write_pointer);
diff --git a/fs/xfs/scrub/cow_repair.c b/fs/xfs/scrub/cow_repair.c
index 38a246b8bf11..b2a83801412e 100644
--- a/fs/xfs/scrub/cow_repair.c
+++ b/fs/xfs/scrub/cow_repair.c
@@ -300,7 +300,7 @@ xrep_cow_find_bad(
* on the debugging knob, replace everything in the CoW fork.
*/
if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
- XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
+ XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
xc->irec.br_blockcount);
if (error)
@@ -385,7 +385,7 @@ xrep_cow_find_bad_rt(
* CoW fork and then scan for staging extents in the refcountbt.
*/
if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
- XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
+ XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
xc->irec.br_blockcount);
if (error)
diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c
index 14939d7de349..378ec7c8d38e 100644
--- a/fs/xfs/scrub/metapath.c
+++ b/fs/xfs/scrub/metapath.c
@@ -79,7 +79,7 @@ xchk_metapath_cleanup(
if (mpath->dp_ilock_flags)
xfs_iunlock(mpath->dp, mpath->dp_ilock_flags);
- kfree(mpath->path);
+ kfree_const(mpath->path);
}
/* Set up a metadir path scan. @path must be dynamically allocated. */
@@ -98,13 +98,13 @@ xchk_setup_metapath_scan(
error = xchk_install_live_inode(sc, ip);
if (error) {
- kfree(path);
+ kfree_const(path);
return error;
}
mpath = kzalloc(sizeof(struct xchk_metapath), XCHK_GFP_FLAGS);
if (!mpath) {
- kfree(path);
+ kfree_const(path);
return -ENOMEM;
}
@@ -132,7 +132,7 @@ xchk_setup_metapath_rtdir(
return -ENOENT;
return xchk_setup_metapath_scan(sc, sc->mp->m_metadirip,
- kasprintf(GFP_KERNEL, "rtgroups"), sc->mp->m_rtdirip);
+ kstrdup_const("rtgroups", GFP_KERNEL), sc->mp->m_rtdirip);
}
/* Scan a rtgroup inode under the /rtgroups directory. */
@@ -179,7 +179,7 @@ xchk_setup_metapath_quotadir(
return -ENOENT;
return xchk_setup_metapath_scan(sc, sc->mp->m_metadirip,
- kstrdup("quota", GFP_KERNEL), qi->qi_dirip);
+ kstrdup_const("quota", GFP_KERNEL), qi->qi_dirip);
}
/* Scan a quota inode under the /quota directory. */
@@ -212,7 +212,7 @@ xchk_setup_metapath_dqinode(
return -ENOENT;
return xchk_setup_metapath_scan(sc, qi->qi_dirip,
- kstrdup(xfs_dqinode_path(type), GFP_KERNEL), ip);
+ kstrdup_const(xfs_dqinode_path(type), GFP_KERNEL), ip);
}
#else
# define xchk_setup_metapath_quotadir(...) (-ENOENT)
diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c
index 1588ce971cb8..951ae8b71566 100644
--- a/fs/xfs/scrub/newbt.c
+++ b/fs/xfs/scrub/newbt.c
@@ -28,6 +28,15 @@
#include "scrub/newbt.h"
/*
+ * This is the maximum number of deferred extent freeing item extents (EFIs)
+ * that we'll attach to a transaction without rolling the transaction to avoid
+ * overrunning a tr_itruncate reservation. The newbt code should reserve
+ * exactly the correct number of blocks to rebuild the btree, so there should
+ * not be any excess blocks to free when committing a new btree.
+ */
+#define XREP_MAX_ITRUNCATE_EFIS (128)
+
+/*
* Estimate proper slack values for a btree that's being reloaded.
*
* Under most circumstances, we'll take whatever default loading value the
diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c
index 8703897c0a9c..07f5bb8a6421 100644
--- a/fs/xfs/scrub/reap.c
+++ b/fs/xfs/scrub/reap.c
@@ -36,6 +36,12 @@
#include "xfs_metafile.h"
#include "xfs_rtgroup.h"
#include "xfs_rtrmap_btree.h"
+#include "xfs_extfree_item.h"
+#include "xfs_rmap_item.h"
+#include "xfs_refcount_item.h"
+#include "xfs_buf_item.h"
+#include "xfs_bmap_item.h"
+#include "xfs_bmap_btree.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -91,21 +97,33 @@
struct xreap_state {
struct xfs_scrub *sc;
- /* Reverse mapping owner and metadata reservation type. */
- const struct xfs_owner_info *oinfo;
- enum xfs_ag_resv_type resv;
+ union {
+ struct {
+ /*
+ * For AG blocks, this is reverse mapping owner and
+ * metadata reservation type.
+ */
+ const struct xfs_owner_info *oinfo;
+ enum xfs_ag_resv_type resv;
+ };
+ struct {
+ /* For file blocks, this is the inode and fork. */
+ struct xfs_inode *ip;
+ int whichfork;
+ };
+ };
- /* If true, roll the transaction before reaping the next extent. */
- bool force_roll;
+ /* Number of invalidated buffers logged to the current transaction. */
+ unsigned int nr_binval;
- /* Number of deferred reaps attached to the current transaction. */
- unsigned int deferred;
+ /* Maximum number of buffers we can invalidate in a single tx. */
+ unsigned int max_binval;
- /* Number of invalidated buffers logged to the current transaction. */
- unsigned int invalidated;
+ /* Number of deferred reaps attached to the current transaction. */
+ unsigned int nr_deferred;
- /* Number of deferred reaps queued during the whole reap sequence. */
- unsigned long long total_deferred;
+ /* Maximum number of intents we can reap in a single transaction. */
+ unsigned int max_deferred;
};
/* Put a block back on the AGFL. */
@@ -148,71 +166,79 @@ xreap_put_freelist(
}
/* Are there any uncommitted reap operations? */
-static inline bool xreap_dirty(const struct xreap_state *rs)
+static inline bool xreap_is_dirty(const struct xreap_state *rs)
{
- if (rs->force_roll)
- return true;
- if (rs->deferred)
- return true;
- if (rs->invalidated)
- return true;
- if (rs->total_deferred)
- return true;
- return false;
+ return rs->nr_binval > 0 || rs->nr_deferred > 0;
}
-#define XREAP_MAX_BINVAL (2048)
-
/*
- * Decide if we want to roll the transaction after reaping an extent. We don't
- * want to overrun the transaction reservation, so we prohibit more than
- * 128 EFIs per transaction. For the same reason, we limit the number
- * of buffer invalidations to 2048.
+ * Decide if we need to roll the transaction to clear out the the log
+ * reservation that we allocated to buffer invalidations.
*/
-static inline bool xreap_want_roll(const struct xreap_state *rs)
+static inline bool xreap_want_binval_roll(const struct xreap_state *rs)
{
- if (rs->force_roll)
- return true;
- if (rs->deferred > XREP_MAX_ITRUNCATE_EFIS)
- return true;
- if (rs->invalidated > XREAP_MAX_BINVAL)
- return true;
- return false;
+ return rs->nr_binval >= rs->max_binval;
}
-static inline void xreap_reset(struct xreap_state *rs)
+/* Reset the buffer invalidation count after rolling. */
+static inline void xreap_binval_reset(struct xreap_state *rs)
{
- rs->total_deferred += rs->deferred;
- rs->deferred = 0;
- rs->invalidated = 0;
- rs->force_roll = false;
+ rs->nr_binval = 0;
}
-#define XREAP_MAX_DEFER_CHAIN (2048)
+/*
+ * Bump the number of invalidated buffers, and return true if we can continue,
+ * or false if we need to roll the transaction.
+ */
+static inline bool xreap_inc_binval(struct xreap_state *rs)
+{
+ rs->nr_binval++;
+ return rs->nr_binval < rs->max_binval;
+}
/*
* Decide if we want to finish the deferred ops that are attached to the scrub
* transaction. We don't want to queue huge chains of deferred ops because
* that can consume a lot of log space and kernel memory. Hence we trigger a
- * xfs_defer_finish if there are more than 2048 deferred reap operations or the
- * caller did some real work.
+ * xfs_defer_finish if there are too many deferred reap operations or we've run
+ * out of space for invalidations.
*/
-static inline bool
-xreap_want_defer_finish(const struct xreap_state *rs)
+static inline bool xreap_want_defer_finish(const struct xreap_state *rs)
{
- if (rs->force_roll)
- return true;
- if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN)
- return true;
- return false;
+ return rs->nr_deferred >= rs->max_deferred;
}
+/*
+ * Reset the defer chain length and buffer invalidation count after finishing
+ * items.
+ */
static inline void xreap_defer_finish_reset(struct xreap_state *rs)
{
- rs->total_deferred = 0;
- rs->deferred = 0;
- rs->invalidated = 0;
- rs->force_roll = false;
+ rs->nr_deferred = 0;
+ rs->nr_binval = 0;
+}
+
+/*
+ * Bump the number of deferred extent reaps.
+ */
+static inline void xreap_inc_defer(struct xreap_state *rs)
+{
+ rs->nr_deferred++;
+}
+
+/* Force the caller to finish a deferred item chain. */
+static inline void xreap_force_defer_finish(struct xreap_state *rs)
+{
+ rs->nr_deferred = rs->max_deferred;
+}
+
+/* Maximum number of fsblocks that we might find in a buffer to invalidate. */
+static inline unsigned int
+xrep_binval_max_fsblocks(
+ struct xfs_mount *mp)
+{
+ /* Remote xattr values are the largest buffers that we support. */
+ return xfs_attr3_max_rmt_blocks(mp);
}
/*
@@ -224,12 +250,8 @@ xrep_bufscan_max_sectors(
struct xfs_mount *mp,
xfs_extlen_t fsblocks)
{
- int max_fsbs;
-
- /* Remote xattr values are the largest buffers that we support. */
- max_fsbs = xfs_attr3_max_rmt_blocks(mp);
-
- return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks, max_fsbs));
+ return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks,
+ xrep_binval_max_fsblocks(mp)));
}
/*
@@ -297,14 +319,13 @@ xreap_agextent_binval(
while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
xfs_trans_bjoin(sc->tp, bp);
xfs_trans_binval(sc->tp, bp);
- rs->invalidated++;
/*
* Stop invalidating if we've hit the limit; we should
* still have enough reservation left to free however
* far we've gotten.
*/
- if (rs->invalidated > XREAP_MAX_BINVAL) {
+ if (!xreap_inc_binval(rs)) {
*aglenp -= agbno_next - bno;
goto out;
}
@@ -416,21 +437,23 @@ xreap_agextent_iter(
trace_xreap_dispose_unmap_extent(pag_group(sc->sa.pag), agbno,
*aglenp);
- rs->force_roll = true;
-
if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
/*
- * If we're unmapping CoW staging extents, remove the
+ * t0: Unmapping CoW staging extents, remove the
* records from the refcountbt, which will remove the
* rmap record as well.
*/
xfs_refcount_free_cow_extent(sc->tp, false, fsbno,
*aglenp);
+ xreap_inc_defer(rs);
return 0;
}
- return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
- *aglenp, rs->oinfo);
+ /* t1: unmap crosslinked metadata blocks */
+ xfs_rmap_free_extent(sc->tp, false, fsbno, *aglenp,
+ rs->oinfo->oi_owner);
+ xreap_inc_defer(rs);
+ return 0;
}
trace_xreap_dispose_free_extent(pag_group(sc->sa.pag), agbno, *aglenp);
@@ -443,12 +466,12 @@ xreap_agextent_iter(
*/
xreap_agextent_binval(rs, agbno, aglenp);
if (*aglenp == 0) {
- ASSERT(xreap_want_roll(rs));
+ ASSERT(xreap_want_binval_roll(rs));
return 0;
}
/*
- * If we're getting rid of CoW staging extents, use deferred work items
+ * t2: To get rid of CoW staging extents, use deferred work items
* to remove the refcountbt records (which removes the rmap records)
* and free the extent. We're not worried about the system going down
* here because log recovery walks the refcount btree to clean out the
@@ -463,23 +486,23 @@ xreap_agextent_iter(
if (error)
return error;
- rs->force_roll = true;
+ xreap_inc_defer(rs);
return 0;
}
- /* Put blocks back on the AGFL one at a time. */
+ /* t3: Put blocks back on the AGFL one at a time. */
if (rs->resv == XFS_AG_RESV_AGFL) {
ASSERT(*aglenp == 1);
error = xreap_put_freelist(sc, agbno);
if (error)
return error;
- rs->force_roll = true;
+ xreap_force_defer_finish(rs);
return 0;
}
/*
- * Use deferred frees to get rid of the old btree blocks to try to
+ * t4: Use deferred frees to get rid of the old btree blocks to try to
* minimize the window in which we could crash and lose the old blocks.
* Add a defer ops barrier every other extent to avoid stressing the
* system with large EFIs.
@@ -489,12 +512,194 @@ xreap_agextent_iter(
if (error)
return error;
- rs->deferred++;
- if (rs->deferred % 2 == 0)
+ xreap_inc_defer(rs);
+ if (rs->nr_deferred % 2 == 0)
xfs_defer_add_barrier(sc->tp);
return 0;
}
+/* Configure the deferral and invalidation limits */
+static inline void
+xreap_configure_limits(
+ struct xreap_state *rs,
+ unsigned int fixed_overhead,
+ unsigned int variable_overhead,
+ unsigned int per_intent,
+ unsigned int per_binval)
+{
+ struct xfs_scrub *sc = rs->sc;
+ unsigned int res = sc->tp->t_log_res - fixed_overhead;
+
+ /* Don't underflow the reservation */
+ if (sc->tp->t_log_res < (fixed_overhead + variable_overhead)) {
+ ASSERT(sc->tp->t_log_res >=
+ (fixed_overhead + variable_overhead));
+ xfs_force_shutdown(sc->mp, SHUTDOWN_CORRUPT_INCORE);
+ return;
+ }
+
+ rs->max_deferred = per_intent ? res / variable_overhead : 0;
+ res -= rs->max_deferred * per_intent;
+ rs->max_binval = per_binval ? res / per_binval : 0;
+}
+
+/*
+ * Compute the maximum number of intent items that reaping can attach to the
+ * scrub transaction given the worst case log overhead of the intent items
+ * needed to reap a single per-AG space extent. This is not for freeing CoW
+ * staging extents.
+ */
+STATIC void
+xreap_configure_agextent_limits(
+ struct xreap_state *rs)
+{
+ struct xfs_scrub *sc = rs->sc;
+ struct xfs_mount *mp = sc->mp;
+
+ /*
+ * In the worst case, relogging an intent item causes both an intent
+ * item and a done item to be attached to a transaction for each extent
+ * that we'd like to process.
+ */
+ const unsigned int efi = xfs_efi_log_space(1) +
+ xfs_efd_log_space(1);
+ const unsigned int rui = xfs_rui_log_space(1) +
+ xfs_rud_log_space();
+
+ /*
+ * Various things can happen when reaping non-CoW metadata blocks:
+ *
+ * t1: Unmapping crosslinked metadata blocks: deferred removal of rmap
+ * record.
+ *
+ * t3: Freeing to AGFL: roll and finish deferred items for every block.
+ * Limits here do not matter.
+ *
+ * t4: Freeing metadata blocks: deferred freeing of the space, which
+ * also removes the rmap record.
+ *
+ * For simplicity, we'll use the worst-case intents size to determine
+ * the maximum number of deferred extents before we have to finish the
+ * whole chain. If we're trying to reap a btree larger than this size,
+ * a crash midway through reaping can result in leaked blocks.
+ */
+ const unsigned int t1 = rui;
+ const unsigned int t4 = rui + efi;
+ const unsigned int per_intent = max(t1, t4);
+
+ /*
+ * For each transaction in a reap chain, we must be able to take one
+ * step in the defer item chain, which should only consist of EFI or
+ * RUI items.
+ */
+ const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
+ const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
+ const unsigned int step_size = max(f1, f2);
+
+ /* Largest buffer size (in fsblocks) that can be invalidated. */
+ const unsigned int max_binval = xrep_binval_max_fsblocks(mp);
+
+ /* Maximum overhead of invalidating one buffer. */
+ const unsigned int per_binval =
+ xfs_buf_inval_log_space(1, XFS_B_TO_FSBT(mp, max_binval));
+
+ /*
+ * For each transaction in a reap chain, we can delete some number of
+ * extents and invalidate some number of blocks. We assume that btree
+ * blocks aren't usually contiguous; and that scrub likely pulled all
+ * the buffers into memory. From these assumptions, set the maximum
+ * number of deferrals we can queue before flushing the defer chain,
+ * and the number of invalidations we can queue before rolling to a
+ * clean transaction (and possibly relogging some of the deferrals) to
+ * the same quantity.
+ */
+ const unsigned int variable_overhead = per_intent + per_binval;
+
+ xreap_configure_limits(rs, step_size, variable_overhead, per_intent,
+ per_binval);
+
+ trace_xreap_agextent_limits(sc->tp, per_binval, rs->max_binval,
+ step_size, per_intent, rs->max_deferred);
+}
+
+/*
+ * Compute the maximum number of intent items that reaping can attach to the
+ * scrub transaction given the worst case log overhead of the intent items
+ * needed to reap a single CoW staging extent. This is not for freeing
+ * metadata blocks.
+ */
+STATIC void
+xreap_configure_agcow_limits(
+ struct xreap_state *rs)
+{
+ struct xfs_scrub *sc = rs->sc;
+ struct xfs_mount *mp = sc->mp;
+
+ /*
+ * In the worst case, relogging an intent item causes both an intent
+ * item and a done item to be attached to a transaction for each extent
+ * that we'd like to process.
+ */
+ const unsigned int efi = xfs_efi_log_space(1) +
+ xfs_efd_log_space(1);
+ const unsigned int rui = xfs_rui_log_space(1) +
+ xfs_rud_log_space();
+ const unsigned int cui = xfs_cui_log_space(1) +
+ xfs_cud_log_space();
+
+ /*
+ * Various things can happen when reaping non-CoW metadata blocks:
+ *
+ * t0: Unmapping crosslinked CoW blocks: deferred removal of refcount
+ * record, which defers removal of rmap record
+ *
+ * t2: Freeing CoW blocks: deferred removal of refcount record, which
+ * defers removal of rmap record; and deferred removal of the space
+ *
+ * For simplicity, we'll use the worst-case intents size to determine
+ * the maximum number of deferred extents before we have to finish the
+ * whole chain. If we're trying to reap a btree larger than this size,
+ * a crash midway through reaping can result in leaked blocks.
+ */
+ const unsigned int t0 = cui + rui;
+ const unsigned int t2 = cui + rui + efi;
+ const unsigned int per_intent = max(t0, t2);
+
+ /*
+ * For each transaction in a reap chain, we must be able to take one
+ * step in the defer item chain, which should only consist of CUI, EFI,
+ * or RUI items.
+ */
+ const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
+ const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
+ const unsigned int f3 = xfs_calc_finish_cui_reservation(mp, 1);
+ const unsigned int step_size = max3(f1, f2, f3);
+
+ /* Largest buffer size (in fsblocks) that can be invalidated. */
+ const unsigned int max_binval = xrep_binval_max_fsblocks(mp);
+
+ /* Overhead of invalidating one buffer */
+ const unsigned int per_binval =
+ xfs_buf_inval_log_space(1, XFS_B_TO_FSBT(mp, max_binval));
+
+ /*
+ * For each transaction in a reap chain, we can delete some number of
+ * extents and invalidate some number of blocks. We assume that CoW
+ * staging extents are usually more than 1 fsblock, and that there
+ * shouldn't be any buffers for those blocks. From the assumptions,
+ * set the number of deferrals to use as much of the reservation as
+ * it can, but leave space to invalidate 1/8th that number of buffers.
+ */
+ const unsigned int variable_overhead = per_intent +
+ (per_binval / 8);
+
+ xreap_configure_limits(rs, step_size, variable_overhead, per_intent,
+ per_binval);
+
+ trace_xreap_agcow_limits(sc->tp, per_binval, rs->max_binval, step_size,
+ per_intent, rs->max_deferred);
+}
+
/*
* Break an AG metadata extent into sub-extents by fate (crosslinked, not
* crosslinked), and dispose of each sub-extent separately.
@@ -531,11 +736,11 @@ xreap_agmeta_extent(
if (error)
return error;
xreap_defer_finish_reset(rs);
- } else if (xreap_want_roll(rs)) {
+ } else if (xreap_want_binval_roll(rs)) {
error = xrep_roll_ag_trans(sc);
if (error)
return error;
- xreap_reset(rs);
+ xreap_binval_reset(rs);
}
agbno += aglen;
@@ -562,11 +767,12 @@ xrep_reap_agblocks(
ASSERT(xfs_has_rmapbt(sc->mp));
ASSERT(sc->ip == NULL);
+ xreap_configure_agextent_limits(&rs);
error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs);
if (error)
return error;
- if (xreap_dirty(&rs))
+ if (xreap_is_dirty(&rs))
return xrep_defer_finish(sc);
return 0;
@@ -628,7 +834,7 @@ xreap_fsmeta_extent(
if (error)
goto out_agf;
xreap_defer_finish_reset(rs);
- } else if (xreap_want_roll(rs)) {
+ } else if (xreap_want_binval_roll(rs)) {
/*
* Hold the AGF buffer across the transaction roll so
* that we don't have to reattach it to the scrub
@@ -639,7 +845,7 @@ xreap_fsmeta_extent(
xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
if (error)
goto out_agf;
- xreap_reset(rs);
+ xreap_binval_reset(rs);
}
agbno += aglen;
@@ -674,11 +880,15 @@ xrep_reap_fsblocks(
ASSERT(xfs_has_rmapbt(sc->mp));
ASSERT(sc->ip != NULL);
+ if (oinfo == &XFS_RMAP_OINFO_COW)
+ xreap_configure_agcow_limits(&rs);
+ else
+ xreap_configure_agextent_limits(&rs);
error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
if (error)
return error;
- if (xreap_dirty(&rs))
+ if (xreap_is_dirty(&rs))
return xrep_defer_finish(sc);
return 0;
@@ -770,7 +980,7 @@ xreap_rgextent_iter(
rtbno = xfs_rgbno_to_rtb(sc->sr.rtg, rgbno);
/*
- * If there are other rmappings, this block is cross linked and must
+ * t1: There are other rmappings; this block is cross linked and must
* not be freed. Remove the forward and reverse mapping and move on.
*/
if (crosslinked) {
@@ -778,14 +988,14 @@ xreap_rgextent_iter(
*rglenp);
xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp);
- rs->deferred++;
+ xreap_inc_defer(rs);
return 0;
}
trace_xreap_dispose_free_extent(rtg_group(sc->sr.rtg), rgbno, *rglenp);
/*
- * The CoW staging extent is not crosslinked. Use deferred work items
+ * t2: The CoW staging extent is not crosslinked. Use deferred work
* to remove the refcountbt records (which removes the rmap records)
* and free the extent. We're not worried about the system going down
* here because log recovery walks the refcount btree to clean out the
@@ -799,10 +1009,73 @@ xreap_rgextent_iter(
if (error)
return error;
- rs->deferred++;
+ xreap_inc_defer(rs);
return 0;
}
+/*
+ * Compute the maximum number of intent items that reaping can attach to the
+ * scrub transaction given the worst case log overhead of the intent items
+ * needed to reap a single CoW staging extent. This is not for freeing
+ * metadata blocks.
+ */
+STATIC void
+xreap_configure_rgcow_limits(
+ struct xreap_state *rs)
+{
+ struct xfs_scrub *sc = rs->sc;
+ struct xfs_mount *mp = sc->mp;
+
+ /*
+ * In the worst case, relogging an intent item causes both an intent
+ * item and a done item to be attached to a transaction for each extent
+ * that we'd like to process.
+ */
+ const unsigned int efi = xfs_efi_log_space(1) +
+ xfs_efd_log_space(1);
+ const unsigned int rui = xfs_rui_log_space(1) +
+ xfs_rud_log_space();
+ const unsigned int cui = xfs_cui_log_space(1) +
+ xfs_cud_log_space();
+
+ /*
+ * Various things can happen when reaping non-CoW metadata blocks:
+ *
+ * t1: Unmapping crosslinked CoW blocks: deferred removal of refcount
+ * record, which defers removal of rmap record
+ *
+ * t2: Freeing CoW blocks: deferred removal of refcount record, which
+ * defers removal of rmap record; and deferred removal of the space
+ *
+ * For simplicity, we'll use the worst-case intents size to determine
+ * the maximum number of deferred extents before we have to finish the
+ * whole chain. If we're trying to reap a btree larger than this size,
+ * a crash midway through reaping can result in leaked blocks.
+ */
+ const unsigned int t1 = cui + rui;
+ const unsigned int t2 = cui + rui + efi;
+ const unsigned int per_intent = max(t1, t2);
+
+ /*
+ * For each transaction in a reap chain, we must be able to take one
+ * step in the defer item chain, which should only consist of CUI, EFI,
+ * or RUI items.
+ */
+ const unsigned int f1 = xfs_calc_finish_rt_efi_reservation(mp, 1);
+ const unsigned int f2 = xfs_calc_finish_rt_rui_reservation(mp, 1);
+ const unsigned int f3 = xfs_calc_finish_rt_cui_reservation(mp, 1);
+ const unsigned int step_size = max3(f1, f2, f3);
+
+ /*
+ * The only buffer for the rt device is the rtgroup super, so we don't
+ * need to save space for buffer invalidations.
+ */
+ xreap_configure_limits(rs, step_size, per_intent, per_intent, 0);
+
+ trace_xreap_rgcow_limits(sc->tp, 0, 0, step_size, per_intent,
+ rs->max_deferred);
+}
+
#define XREAP_RTGLOCK_ALL (XFS_RTGLOCK_BITMAP | \
XFS_RTGLOCK_RMAP | \
XFS_RTGLOCK_REFCOUNT)
@@ -855,11 +1128,11 @@ xreap_rtmeta_extent(
if (error)
goto out_unlock;
xreap_defer_finish_reset(rs);
- } else if (xreap_want_roll(rs)) {
+ } else if (xreap_want_binval_roll(rs)) {
error = xfs_trans_roll_inode(&sc->tp, sc->ip);
if (error)
goto out_unlock;
- xreap_reset(rs);
+ xreap_binval_reset(rs);
}
rgbno += rglen;
@@ -891,12 +1164,14 @@ xrep_reap_rtblocks(
ASSERT(xfs_has_rmapbt(sc->mp));
ASSERT(sc->ip != NULL);
+ ASSERT(oinfo == &XFS_RMAP_OINFO_COW);
+ xreap_configure_rgcow_limits(&rs);
error = xrtb_bitmap_walk(bitmap, xreap_rtmeta_extent, &rs);
if (error)
return error;
- if (xreap_dirty(&rs))
+ if (xreap_is_dirty(&rs))
return xrep_defer_finish(sc);
return 0;
@@ -929,13 +1204,13 @@ xrep_reap_metadir_fsblocks(
ASSERT(sc->ip != NULL);
ASSERT(xfs_is_metadir_inode(sc->ip));
+ xreap_configure_agextent_limits(&rs);
xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
-
error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
if (error)
return error;
- if (xreap_dirty(&rs)) {
+ if (xreap_is_dirty(&rs)) {
error = xrep_defer_finish(sc);
if (error)
return error;
@@ -955,13 +1230,12 @@ xrep_reap_metadir_fsblocks(
*/
STATIC int
xreap_bmapi_select(
- struct xfs_scrub *sc,
- struct xfs_inode *ip,
- int whichfork,
+ struct xreap_state *rs,
struct xfs_bmbt_irec *imap,
bool *crosslinked)
{
struct xfs_owner_info oinfo;
+ struct xfs_scrub *sc = rs->sc;
struct xfs_btree_cur *cur;
xfs_filblks_t len = 1;
xfs_agblock_t bno;
@@ -975,7 +1249,8 @@ xreap_bmapi_select(
cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
sc->sa.pag);
- xfs_rmap_ino_owner(&oinfo, ip->i_ino, whichfork, imap->br_startoff);
+ xfs_rmap_ino_owner(&oinfo, rs->ip->i_ino, rs->whichfork,
+ imap->br_startoff);
error = xfs_rmap_has_other_keys(cur, agbno, 1, &oinfo, crosslinked);
if (error)
goto out_cur;
@@ -1038,21 +1313,19 @@ xreap_buf_loggable(
*/
STATIC int
xreap_bmapi_binval(
- struct xfs_scrub *sc,
- struct xfs_inode *ip,
- int whichfork,
+ struct xreap_state *rs,
struct xfs_bmbt_irec *imap)
{
+ struct xfs_scrub *sc = rs->sc;
struct xfs_mount *mp = sc->mp;
struct xfs_perag *pag = sc->sa.pag;
- int bmap_flags = xfs_bmapi_aflag(whichfork);
+ int bmap_flags = xfs_bmapi_aflag(rs->whichfork);
xfs_fileoff_t off;
xfs_fileoff_t max_off;
xfs_extlen_t scan_blocks;
xfs_agblock_t bno;
xfs_agblock_t agbno;
xfs_agblock_t agbno_next;
- unsigned int invalidated = 0;
int error;
/*
@@ -1079,7 +1352,7 @@ xreap_bmapi_binval(
struct xfs_bmbt_irec hmap;
int nhmaps = 1;
- error = xfs_bmapi_read(ip, off, max_off - off, &hmap,
+ error = xfs_bmapi_read(rs->ip, off, max_off - off, &hmap,
&nhmaps, bmap_flags);
if (error)
return error;
@@ -1120,14 +1393,13 @@ xreap_bmapi_binval(
xfs_buf_stale(bp);
xfs_buf_relse(bp);
}
- invalidated++;
/*
* Stop invalidating if we've hit the limit; we should
* still have enough reservation left to free however
- * much of the mapping we've seen so far.
+ * far we've gotten.
*/
- if (invalidated > XREAP_MAX_BINVAL) {
+ if (!xreap_inc_binval(rs)) {
imap->br_blockcount = agbno_next - bno;
goto out;
}
@@ -1149,12 +1421,11 @@ out:
*/
STATIC int
xrep_reap_bmapi_iter(
- struct xfs_scrub *sc,
- struct xfs_inode *ip,
- int whichfork,
+ struct xreap_state *rs,
struct xfs_bmbt_irec *imap,
bool crosslinked)
{
+ struct xfs_scrub *sc = rs->sc;
int error;
if (crosslinked) {
@@ -1171,14 +1442,14 @@ xrep_reap_bmapi_iter(
imap->br_blockcount);
/*
- * Schedule removal of the mapping from the fork. We use
+ * t0: Schedule removal of the mapping from the fork. We use
* deferred log intents in this function to control the exact
* sequence of metadata updates.
*/
- xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
- xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
+ xfs_bmap_unmap_extent(sc->tp, rs->ip, rs->whichfork, imap);
+ xfs_trans_mod_dquot_byino(sc->tp, rs->ip, XFS_TRANS_DQ_BCOUNT,
-(int64_t)imap->br_blockcount);
- xfs_rmap_unmap_extent(sc->tp, ip, whichfork, imap);
+ xfs_rmap_unmap_extent(sc->tp, rs->ip, rs->whichfork, imap);
return 0;
}
@@ -1199,41 +1470,139 @@ xrep_reap_bmapi_iter(
* transaction is full of logged buffer invalidations, so we need to
* return early so that we can roll and retry.
*/
- error = xreap_bmapi_binval(sc, ip, whichfork, imap);
+ error = xreap_bmapi_binval(rs, imap);
if (error || imap->br_blockcount == 0)
return error;
/*
- * Schedule removal of the mapping from the fork. We use deferred log
- * intents in this function to control the exact sequence of metadata
+ * t1: Schedule removal of the mapping from the fork. We use deferred
+ * work in this function to control the exact sequence of metadata
* updates.
*/
- xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
- xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
+ xfs_bmap_unmap_extent(sc->tp, rs->ip, rs->whichfork, imap);
+ xfs_trans_mod_dquot_byino(sc->tp, rs->ip, XFS_TRANS_DQ_BCOUNT,
-(int64_t)imap->br_blockcount);
return xfs_free_extent_later(sc->tp, imap->br_startblock,
imap->br_blockcount, NULL, XFS_AG_RESV_NONE,
XFS_FREE_EXTENT_SKIP_DISCARD);
}
+/* Compute the maximum mapcount of a file buffer. */
+static unsigned int
+xreap_bmapi_binval_mapcount(
+ struct xfs_scrub *sc)
+{
+ /* directory blocks can span multiple fsblocks and be discontiguous */
+ if (sc->sm->sm_type == XFS_SCRUB_TYPE_DIR)
+ return sc->mp->m_dir_geo->fsbcount;
+
+ /* all other file xattr/symlink blocks must be contiguous */
+ return 1;
+}
+
+/* Compute the maximum block size of a file buffer. */
+static unsigned int
+xreap_bmapi_binval_blocksize(
+ struct xfs_scrub *sc)
+{
+ switch (sc->sm->sm_type) {
+ case XFS_SCRUB_TYPE_DIR:
+ return sc->mp->m_dir_geo->blksize;
+ case XFS_SCRUB_TYPE_XATTR:
+ case XFS_SCRUB_TYPE_PARENT:
+ /*
+ * The xattr structure itself consists of single fsblocks, but
+ * there could be remote xattr blocks to invalidate.
+ */
+ return XFS_XATTR_SIZE_MAX;
+ }
+
+ /* everything else is a single block */
+ return sc->mp->m_sb.sb_blocksize;
+}
+
+/*
+ * Compute the maximum number of buffer invalidations that we can do while
+ * reaping a single extent from a file fork.
+ */
+STATIC void
+xreap_configure_bmapi_limits(
+ struct xreap_state *rs)
+{
+ struct xfs_scrub *sc = rs->sc;
+ struct xfs_mount *mp = sc->mp;
+
+ /* overhead of invalidating a buffer */
+ const unsigned int per_binval =
+ xfs_buf_inval_log_space(xreap_bmapi_binval_mapcount(sc),
+ xreap_bmapi_binval_blocksize(sc));
+
+ /*
+ * In the worst case, relogging an intent item causes both an intent
+ * item and a done item to be attached to a transaction for each extent
+ * that we'd like to process.
+ */
+ const unsigned int efi = xfs_efi_log_space(1) +
+ xfs_efd_log_space(1);
+ const unsigned int rui = xfs_rui_log_space(1) +
+ xfs_rud_log_space();
+ const unsigned int bui = xfs_bui_log_space(1) +
+ xfs_bud_log_space();
+
+ /*
+ * t1: Unmapping crosslinked file data blocks: one bmap deletion,
+ * possibly an EFI for underfilled bmbt blocks, and an rmap deletion.
+ *
+ * t2: Freeing freeing file data blocks: one bmap deletion, possibly an
+ * EFI for underfilled bmbt blocks, and another EFI for the space
+ * itself.
+ */
+ const unsigned int t1 = (bui + efi) + rui;
+ const unsigned int t2 = (bui + efi) + efi;
+ const unsigned int per_intent = max(t1, t2);
+
+ /*
+ * For each transaction in a reap chain, we must be able to take one
+ * step in the defer item chain, which should only consist of CUI, EFI,
+ * or RUI items.
+ */
+ const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
+ const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
+ const unsigned int f3 = xfs_calc_finish_bui_reservation(mp, 1);
+ const unsigned int step_size = max3(f1, f2, f3);
+
+ /*
+ * Each call to xreap_ifork_extent starts with a clean transaction and
+ * operates on a single mapping by creating a chain of log intent items
+ * for that mapping. We need to leave enough reservation in the
+ * transaction to log btree buffer and inode updates for each step in
+ * the chain, and to relog the log intents.
+ */
+ const unsigned int per_extent_res = per_intent + step_size;
+
+ xreap_configure_limits(rs, per_extent_res, per_binval, 0, per_binval);
+
+ trace_xreap_bmapi_limits(sc->tp, per_binval, rs->max_binval,
+ step_size, per_intent, 1);
+}
+
/*
* Dispose of as much of this file extent as we can. Upon successful return,
* the imap will reflect the mapping that was removed from the fork.
*/
STATIC int
xreap_ifork_extent(
- struct xfs_scrub *sc,
- struct xfs_inode *ip,
- int whichfork,
+ struct xreap_state *rs,
struct xfs_bmbt_irec *imap)
{
+ struct xfs_scrub *sc = rs->sc;
xfs_agnumber_t agno;
bool crosslinked;
int error;
ASSERT(sc->sa.pag == NULL);
- trace_xreap_ifork_extent(sc, ip, whichfork, imap);
+ trace_xreap_ifork_extent(sc, rs->ip, rs->whichfork, imap);
agno = XFS_FSB_TO_AGNO(sc->mp, imap->br_startblock);
sc->sa.pag = xfs_perag_get(sc->mp, agno);
@@ -1248,11 +1617,11 @@ xreap_ifork_extent(
* Decide the fate of the blocks at the beginning of the mapping, then
* update the mapping to use it with the unmap calls.
*/
- error = xreap_bmapi_select(sc, ip, whichfork, imap, &crosslinked);
+ error = xreap_bmapi_select(rs, imap, &crosslinked);
if (error)
goto out_agf;
- error = xrep_reap_bmapi_iter(sc, ip, whichfork, imap, crosslinked);
+ error = xrep_reap_bmapi_iter(rs, imap, crosslinked);
if (error)
goto out_agf;
@@ -1276,6 +1645,11 @@ xrep_reap_ifork(
struct xfs_inode *ip,
int whichfork)
{
+ struct xreap_state rs = {
+ .sc = sc,
+ .ip = ip,
+ .whichfork = whichfork,
+ };
xfs_fileoff_t off = 0;
int bmap_flags = xfs_bmapi_aflag(whichfork);
int error;
@@ -1284,6 +1658,7 @@ xrep_reap_ifork(
ASSERT(ip == sc->ip || ip == sc->tempip);
ASSERT(whichfork == XFS_ATTR_FORK || !XFS_IS_REALTIME_INODE(ip));
+ xreap_configure_bmapi_limits(&rs);
while (off < XFS_MAX_FILEOFF) {
struct xfs_bmbt_irec imap;
int nimaps = 1;
@@ -1303,13 +1678,14 @@ xrep_reap_ifork(
* can in a single transaction.
*/
if (xfs_bmap_is_real_extent(&imap)) {
- error = xreap_ifork_extent(sc, ip, whichfork, &imap);
+ error = xreap_ifork_extent(&rs, &imap);
if (error)
return error;
error = xfs_defer_finish(&sc->tp);
if (error)
return error;
+ xreap_defer_finish_reset(&rs);
}
off = imap.br_startoff + imap.br_blockcount;
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index d00c18954a26..efd5a7ccdf62 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -1110,7 +1110,7 @@ xrep_will_attempt(
return true;
/* Let debug users force us into the repair routines. */
- if (XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
+ if (XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
return true;
/* Metadata is corrupt or failed cross-referencing. */
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 9c04295742c8..2bb125c4f9bf 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -18,14 +18,6 @@ static inline int xrep_notsupported(struct xfs_scrub *sc)
#ifdef CONFIG_XFS_ONLINE_REPAIR
-/*
- * This is the maximum number of deferred extent freeing item extents (EFIs)
- * that we'll attach to a transaction without rolling the transaction to avoid
- * overrunning a tr_itruncate reservation.
- */
-#define XREP_MAX_ITRUNCATE_EFIS (128)
-
-
/* Repair helpers */
int xrep_attempt(struct xfs_scrub *sc, struct xchk_stats_run *run);
diff --git a/fs/xfs/scrub/symlink_repair.c b/fs/xfs/scrub/symlink_repair.c
index 953ce7be78dc..5902398185a8 100644
--- a/fs/xfs/scrub/symlink_repair.c
+++ b/fs/xfs/scrub/symlink_repair.c
@@ -185,7 +185,7 @@ xrep_symlink_salvage_inline(
return 0;
nr = min(XFS_SYMLINK_MAXLEN, xfs_inode_data_fork_size(ip));
- strncpy(target_buf, ifp->if_data, nr);
+ memcpy(target_buf, ifp->if_data, nr);
return nr;
}
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 2450e214103f..987313a52e64 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -22,6 +22,7 @@
#include "xfs_parent.h"
#include "xfs_metafile.h"
#include "xfs_rtgroup.h"
+#include "xfs_trans.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index a8187281eb96..39ea651cbb75 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -2000,6 +2000,51 @@ DEFINE_REPAIR_EXTENT_EVENT(xreap_agextent_binval);
DEFINE_REPAIR_EXTENT_EVENT(xreap_bmapi_binval);
DEFINE_REPAIR_EXTENT_EVENT(xrep_agfl_insert);
+DECLARE_EVENT_CLASS(xrep_reap_limits_class,
+ TP_PROTO(const struct xfs_trans *tp, unsigned int per_binval,
+ unsigned int max_binval, unsigned int step_size,
+ unsigned int per_intent,
+ unsigned int max_deferred),
+ TP_ARGS(tp, per_binval, max_binval, step_size, per_intent, max_deferred),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, log_res)
+ __field(unsigned int, per_binval)
+ __field(unsigned int, max_binval)
+ __field(unsigned int, step_size)
+ __field(unsigned int, per_intent)
+ __field(unsigned int, max_deferred)
+ ),
+ TP_fast_assign(
+ __entry->dev = tp->t_mountp->m_super->s_dev;
+ __entry->log_res = tp->t_log_res;
+ __entry->per_binval = per_binval;
+ __entry->max_binval = max_binval;
+ __entry->step_size = step_size;
+ __entry->per_intent = per_intent;
+ __entry->max_deferred = max_deferred;
+ ),
+ TP_printk("dev %d:%d logres %u per_binval %u max_binval %u step_size %u per_intent %u max_deferred %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->log_res,
+ __entry->per_binval,
+ __entry->max_binval,
+ __entry->step_size,
+ __entry->per_intent,
+ __entry->max_deferred)
+);
+#define DEFINE_REPAIR_REAP_LIMITS_EVENT(name) \
+DEFINE_EVENT(xrep_reap_limits_class, name, \
+ TP_PROTO(const struct xfs_trans *tp, unsigned int per_binval, \
+ unsigned int max_binval, unsigned int step_size, \
+ unsigned int per_intent, \
+ unsigned int max_deferred), \
+ TP_ARGS(tp, per_binval, max_binval, step_size, per_intent, max_deferred))
+DEFINE_REPAIR_REAP_LIMITS_EVENT(xreap_agextent_limits);
+DEFINE_REPAIR_REAP_LIMITS_EVENT(xreap_agcow_limits);
+DEFINE_REPAIR_REAP_LIMITS_EVENT(xreap_rgcow_limits);
+DEFINE_REPAIR_REAP_LIMITS_EVENT(xreap_bmapi_limits);
+
DECLARE_EVENT_CLASS(xrep_reap_find_class,
TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno,
xfs_extlen_t len, bool crosslinked),
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index 5eef3bc30bda..c3a593319bee 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -491,7 +491,7 @@ xfs_attr_finish_item(
/* Reset trans after EAGAIN cycle since the transaction is new */
args->trans = tp;
- if (XFS_TEST_ERROR(false, args->dp->i_mount, XFS_ERRTAG_LARP)) {
+ if (XFS_TEST_ERROR(args->dp->i_mount, XFS_ERRTAG_LARP)) {
error = -EIO;
goto out;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index f9ef3b2a332a..773d959965dc 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -387,8 +387,6 @@ xfs_buf_map_verify(
struct xfs_buftarg *btp,
struct xfs_buf_map *map)
{
- xfs_daddr_t eofs;
-
/* Check for IOs smaller than the sector size / not sector aligned */
ASSERT(!(BBTOB(map->bm_len) < btp->bt_meta_sectorsize));
ASSERT(!(BBTOB(map->bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
@@ -397,11 +395,10 @@ xfs_buf_map_verify(
* Corrupted block numbers can get through to here, unfortunately, so we
* have to check that the buffer falls within the filesystem bounds.
*/
- eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
- if (map->bm_bn < 0 || map->bm_bn >= eofs) {
+ if (map->bm_bn < 0 || map->bm_bn >= btp->bt_nr_sectors) {
xfs_alert(btp->bt_mount,
"%s: daddr 0x%llx out of range, EOFS 0x%llx",
- __func__, map->bm_bn, eofs);
+ __func__, map->bm_bn, btp->bt_nr_sectors);
WARN_ON(1);
return -EFSCORRUPTED;
}
@@ -1299,7 +1296,7 @@ xfs_buf_bio_end_io(
if (bio->bi_status)
xfs_buf_ioerror(bp, blk_status_to_errno(bio->bi_status));
else if ((bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
- XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
+ XFS_TEST_ERROR(bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
xfs_buf_ioerror(bp, -EIO);
if (bp->b_flags & XBF_ASYNC) {
@@ -1720,26 +1717,30 @@ xfs_configure_buftarg_atomic_writes(
int
xfs_configure_buftarg(
struct xfs_buftarg *btp,
- unsigned int sectorsize)
+ unsigned int sectorsize,
+ xfs_rfsblock_t nr_blocks)
{
- int error;
+ struct xfs_mount *mp = btp->bt_mount;
- ASSERT(btp->bt_bdev != NULL);
+ if (btp->bt_bdev) {
+ int error;
- /* Set up metadata sector size info */
- btp->bt_meta_sectorsize = sectorsize;
- btp->bt_meta_sectormask = sectorsize - 1;
+ error = bdev_validate_blocksize(btp->bt_bdev, sectorsize);
+ if (error) {
+ xfs_warn(mp,
+ "Cannot use blocksize %u on device %pg, err %d",
+ sectorsize, btp->bt_bdev, error);
+ return -EINVAL;
+ }
- error = bdev_validate_blocksize(btp->bt_bdev, sectorsize);
- if (error) {
- xfs_warn(btp->bt_mount,
- "Cannot use blocksize %u on device %pg, err %d",
- sectorsize, btp->bt_bdev, error);
- return -EINVAL;
+ if (bdev_can_atomic_write(btp->bt_bdev))
+ xfs_configure_buftarg_atomic_writes(btp);
}
- if (bdev_can_atomic_write(btp->bt_bdev))
- xfs_configure_buftarg_atomic_writes(btp);
+ btp->bt_meta_sectorsize = sectorsize;
+ btp->bt_meta_sectormask = sectorsize - 1;
+ /* m_blkbb_log is not set up yet */
+ btp->bt_nr_sectors = nr_blocks << (mp->m_sb.sb_blocklog - BBSHIFT);
return 0;
}
@@ -1749,6 +1750,9 @@ xfs_init_buftarg(
size_t logical_sectorsize,
const char *descr)
{
+ /* The maximum size of the buftarg is only known once the sb is read. */
+ btp->bt_nr_sectors = (xfs_daddr_t)-1;
+
/* Set up device logical sector size mask */
btp->bt_logical_sectorsize = logical_sectorsize;
btp->bt_logical_sectormask = logical_sectorsize - 1;
@@ -2084,7 +2088,7 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
* This allows userspace to disrupt buffer caching for debug/testing
* purposes.
*/
- if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
+ if (XFS_TEST_ERROR(bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
lru_ref = 0;
atomic_set(&bp->b_lru_ref, lru_ref);
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index b269e115d9ac..8fa7bdf59c91 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -103,6 +103,7 @@ struct xfs_buftarg {
size_t bt_meta_sectormask;
size_t bt_logical_sectorsize;
size_t bt_logical_sectormask;
+ xfs_daddr_t bt_nr_sectors;
/* LRU control structures */
struct shrinker *bt_shrinker;
@@ -372,7 +373,8 @@ struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp,
extern void xfs_free_buftarg(struct xfs_buftarg *);
extern void xfs_buftarg_wait(struct xfs_buftarg *);
extern void xfs_buftarg_drain(struct xfs_buftarg *);
-int xfs_configure_buftarg(struct xfs_buftarg *btp, unsigned int sectorsize);
+int xfs_configure_buftarg(struct xfs_buftarg *btp, unsigned int sectorsize,
+ xfs_fsblock_t nr_blocks);
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 5d58e2ae4972..e4c8af873632 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -736,6 +736,16 @@ xlog_recover_do_primary_sb_buffer(
*/
xfs_sb_from_disk(&mp->m_sb, dsb);
+ /*
+ * Grow can change the device size. Mirror that into the buftarg.
+ */
+ mp->m_ddev_targp->bt_nr_sectors =
+ XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+ if (mp->m_rtdev_targp && mp->m_rtdev_targp != mp->m_ddev_targp) {
+ mp->m_rtdev_targp->bt_nr_sectors =
+ XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
+ }
+
if (mp->m_sb.sb_agcount < orig_agcount) {
xfs_alert(mp, "Shrinking AG count in log recovery not supported");
return -EFSCORRUPTED;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index dbd87e137694..39830b252ac8 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -10,61 +10,17 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_sysfs.h"
#include "xfs_inode.h"
#ifdef DEBUG
-static unsigned int xfs_errortag_random_default[] = {
- XFS_RANDOM_DEFAULT,
- XFS_RANDOM_IFLUSH_1,
- XFS_RANDOM_IFLUSH_2,
- XFS_RANDOM_IFLUSH_3,
- XFS_RANDOM_IFLUSH_4,
- XFS_RANDOM_IFLUSH_5,
- XFS_RANDOM_IFLUSH_6,
- XFS_RANDOM_DA_READ_BUF,
- XFS_RANDOM_BTREE_CHECK_LBLOCK,
- XFS_RANDOM_BTREE_CHECK_SBLOCK,
- XFS_RANDOM_ALLOC_READ_AGF,
- XFS_RANDOM_IALLOC_READ_AGI,
- XFS_RANDOM_ITOBP_INOTOBP,
- XFS_RANDOM_IUNLINK,
- XFS_RANDOM_IUNLINK_REMOVE,
- XFS_RANDOM_DIR_INO_VALIDATE,
- XFS_RANDOM_BULKSTAT_READ_CHUNK,
- XFS_RANDOM_IODONE_IOERR,
- XFS_RANDOM_STRATREAD_IOERR,
- XFS_RANDOM_STRATCMPL_IOERR,
- XFS_RANDOM_DIOWRITE_IOERR,
- XFS_RANDOM_BMAPIFORMAT,
- XFS_RANDOM_FREE_EXTENT,
- XFS_RANDOM_RMAP_FINISH_ONE,
- XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE,
- XFS_RANDOM_REFCOUNT_FINISH_ONE,
- XFS_RANDOM_BMAP_FINISH_ONE,
- XFS_RANDOM_AG_RESV_CRITICAL,
- 0, /* XFS_RANDOM_DROP_WRITES has been removed */
- XFS_RANDOM_LOG_BAD_CRC,
- XFS_RANDOM_LOG_ITEM_PIN,
- XFS_RANDOM_BUF_LRU_REF,
- XFS_RANDOM_FORCE_SCRUB_REPAIR,
- XFS_RANDOM_FORCE_SUMMARY_RECALC,
- XFS_RANDOM_IUNLINK_FALLBACK,
- XFS_RANDOM_BUF_IOERROR,
- XFS_RANDOM_REDUCE_MAX_IEXTENTS,
- XFS_RANDOM_BMAP_ALLOC_MINLEN_EXTENT,
- XFS_RANDOM_AG_RESV_FAIL,
- XFS_RANDOM_LARP,
- XFS_RANDOM_DA_LEAF_SPLIT,
- XFS_RANDOM_ATTR_LEAF_TO_NODE,
- XFS_RANDOM_WB_DELAY_MS,
- XFS_RANDOM_WRITE_DELAY_MS,
- XFS_RANDOM_EXCHMAPS_FINISH_ONE,
- XFS_RANDOM_METAFILE_RESV_CRITICAL,
-};
+#define XFS_ERRTAG(_tag, _name, _default) \
+ [XFS_ERRTAG_##_tag] = (_default),
+#include "xfs_errortag.h"
+static const unsigned int xfs_errortag_random_default[] = { XFS_ERRTAGS };
+#undef XFS_ERRTAG
struct xfs_errortag_attr {
struct attribute attr;
@@ -93,21 +49,18 @@ xfs_errortag_attr_store(
size_t count)
{
struct xfs_mount *mp = to_mp(kobject);
- struct xfs_errortag_attr *xfs_attr = to_attr(attr);
+ unsigned int error_tag = to_attr(attr)->tag;
int ret;
- unsigned int val;
if (strcmp(buf, "default") == 0) {
- val = xfs_errortag_random_default[xfs_attr->tag];
+ mp->m_errortag[error_tag] =
+ xfs_errortag_random_default[error_tag];
} else {
- ret = kstrtouint(buf, 0, &val);
+ ret = kstrtouint(buf, 0, &mp->m_errortag[error_tag]);
if (ret)
return ret;
}
- ret = xfs_errortag_set(mp, xfs_attr->tag, val);
- if (ret)
- return ret;
return count;
}
@@ -118,10 +71,9 @@ xfs_errortag_attr_show(
char *buf)
{
struct xfs_mount *mp = to_mp(kobject);
- struct xfs_errortag_attr *xfs_attr = to_attr(attr);
+ unsigned int error_tag = to_attr(attr)->tag;
- return snprintf(buf, PAGE_SIZE, "%u\n",
- xfs_errortag_get(mp, xfs_attr->tag));
+ return snprintf(buf, PAGE_SIZE, "%u\n", mp->m_errortag[error_tag]);
}
static const struct sysfs_ops xfs_errortag_sysfs_ops = {
@@ -129,110 +81,28 @@ static const struct sysfs_ops xfs_errortag_sysfs_ops = {
.store = xfs_errortag_attr_store,
};
-#define XFS_ERRORTAG_ATTR_RW(_name, _tag) \
+#define XFS_ERRTAG(_tag, _name, _default) \
static struct xfs_errortag_attr xfs_errortag_attr_##_name = { \
.attr = {.name = __stringify(_name), \
.mode = VERIFY_OCTAL_PERMISSIONS(S_IWUSR | S_IRUGO) }, \
- .tag = (_tag), \
-}
-
-#define XFS_ERRORTAG_ATTR_LIST(_name) &xfs_errortag_attr_##_name.attr
-
-XFS_ERRORTAG_ATTR_RW(noerror, XFS_ERRTAG_NOERROR);
-XFS_ERRORTAG_ATTR_RW(iflush1, XFS_ERRTAG_IFLUSH_1);
-XFS_ERRORTAG_ATTR_RW(iflush2, XFS_ERRTAG_IFLUSH_2);
-XFS_ERRORTAG_ATTR_RW(iflush3, XFS_ERRTAG_IFLUSH_3);
-XFS_ERRORTAG_ATTR_RW(iflush4, XFS_ERRTAG_IFLUSH_4);
-XFS_ERRORTAG_ATTR_RW(iflush5, XFS_ERRTAG_IFLUSH_5);
-XFS_ERRORTAG_ATTR_RW(iflush6, XFS_ERRTAG_IFLUSH_6);
-XFS_ERRORTAG_ATTR_RW(dareadbuf, XFS_ERRTAG_DA_READ_BUF);
-XFS_ERRORTAG_ATTR_RW(btree_chk_lblk, XFS_ERRTAG_BTREE_CHECK_LBLOCK);
-XFS_ERRORTAG_ATTR_RW(btree_chk_sblk, XFS_ERRTAG_BTREE_CHECK_SBLOCK);
-XFS_ERRORTAG_ATTR_RW(readagf, XFS_ERRTAG_ALLOC_READ_AGF);
-XFS_ERRORTAG_ATTR_RW(readagi, XFS_ERRTAG_IALLOC_READ_AGI);
-XFS_ERRORTAG_ATTR_RW(itobp, XFS_ERRTAG_ITOBP_INOTOBP);
-XFS_ERRORTAG_ATTR_RW(iunlink, XFS_ERRTAG_IUNLINK);
-XFS_ERRORTAG_ATTR_RW(iunlinkrm, XFS_ERRTAG_IUNLINK_REMOVE);
-XFS_ERRORTAG_ATTR_RW(dirinovalid, XFS_ERRTAG_DIR_INO_VALIDATE);
-XFS_ERRORTAG_ATTR_RW(bulkstat, XFS_ERRTAG_BULKSTAT_READ_CHUNK);
-XFS_ERRORTAG_ATTR_RW(logiodone, XFS_ERRTAG_IODONE_IOERR);
-XFS_ERRORTAG_ATTR_RW(stratread, XFS_ERRTAG_STRATREAD_IOERR);
-XFS_ERRORTAG_ATTR_RW(stratcmpl, XFS_ERRTAG_STRATCMPL_IOERR);
-XFS_ERRORTAG_ATTR_RW(diowrite, XFS_ERRTAG_DIOWRITE_IOERR);
-XFS_ERRORTAG_ATTR_RW(bmapifmt, XFS_ERRTAG_BMAPIFORMAT);
-XFS_ERRORTAG_ATTR_RW(free_extent, XFS_ERRTAG_FREE_EXTENT);
-XFS_ERRORTAG_ATTR_RW(rmap_finish_one, XFS_ERRTAG_RMAP_FINISH_ONE);
-XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE);
-XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE);
-XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE);
-XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL);
-XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC);
-XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN);
-XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF);
-XFS_ERRORTAG_ATTR_RW(force_repair, XFS_ERRTAG_FORCE_SCRUB_REPAIR);
-XFS_ERRORTAG_ATTR_RW(bad_summary, XFS_ERRTAG_FORCE_SUMMARY_RECALC);
-XFS_ERRORTAG_ATTR_RW(iunlink_fallback, XFS_ERRTAG_IUNLINK_FALLBACK);
-XFS_ERRORTAG_ATTR_RW(buf_ioerror, XFS_ERRTAG_BUF_IOERROR);
-XFS_ERRORTAG_ATTR_RW(reduce_max_iextents, XFS_ERRTAG_REDUCE_MAX_IEXTENTS);
-XFS_ERRORTAG_ATTR_RW(bmap_alloc_minlen_extent, XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT);
-XFS_ERRORTAG_ATTR_RW(ag_resv_fail, XFS_ERRTAG_AG_RESV_FAIL);
-XFS_ERRORTAG_ATTR_RW(larp, XFS_ERRTAG_LARP);
-XFS_ERRORTAG_ATTR_RW(da_leaf_split, XFS_ERRTAG_DA_LEAF_SPLIT);
-XFS_ERRORTAG_ATTR_RW(attr_leaf_to_node, XFS_ERRTAG_ATTR_LEAF_TO_NODE);
-XFS_ERRORTAG_ATTR_RW(wb_delay_ms, XFS_ERRTAG_WB_DELAY_MS);
-XFS_ERRORTAG_ATTR_RW(write_delay_ms, XFS_ERRTAG_WRITE_DELAY_MS);
-XFS_ERRORTAG_ATTR_RW(exchmaps_finish_one, XFS_ERRTAG_EXCHMAPS_FINISH_ONE);
-XFS_ERRORTAG_ATTR_RW(metafile_resv_crit, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
+ .tag = XFS_ERRTAG_##_tag, \
+};
+#include "xfs_errortag.h"
+XFS_ERRTAGS
+#undef XFS_ERRTAG
+#define XFS_ERRTAG(_tag, _name, _default) \
+ &xfs_errortag_attr_##_name.attr,
+#include "xfs_errortag.h"
static struct attribute *xfs_errortag_attrs[] = {
- XFS_ERRORTAG_ATTR_LIST(noerror),
- XFS_ERRORTAG_ATTR_LIST(iflush1),
- XFS_ERRORTAG_ATTR_LIST(iflush2),
- XFS_ERRORTAG_ATTR_LIST(iflush3),
- XFS_ERRORTAG_ATTR_LIST(iflush4),
- XFS_ERRORTAG_ATTR_LIST(iflush5),
- XFS_ERRORTAG_ATTR_LIST(iflush6),
- XFS_ERRORTAG_ATTR_LIST(dareadbuf),
- XFS_ERRORTAG_ATTR_LIST(btree_chk_lblk),
- XFS_ERRORTAG_ATTR_LIST(btree_chk_sblk),
- XFS_ERRORTAG_ATTR_LIST(readagf),
- XFS_ERRORTAG_ATTR_LIST(readagi),
- XFS_ERRORTAG_ATTR_LIST(itobp),
- XFS_ERRORTAG_ATTR_LIST(iunlink),
- XFS_ERRORTAG_ATTR_LIST(iunlinkrm),
- XFS_ERRORTAG_ATTR_LIST(dirinovalid),
- XFS_ERRORTAG_ATTR_LIST(bulkstat),
- XFS_ERRORTAG_ATTR_LIST(logiodone),
- XFS_ERRORTAG_ATTR_LIST(stratread),
- XFS_ERRORTAG_ATTR_LIST(stratcmpl),
- XFS_ERRORTAG_ATTR_LIST(diowrite),
- XFS_ERRORTAG_ATTR_LIST(bmapifmt),
- XFS_ERRORTAG_ATTR_LIST(free_extent),
- XFS_ERRORTAG_ATTR_LIST(rmap_finish_one),
- XFS_ERRORTAG_ATTR_LIST(refcount_continue_update),
- XFS_ERRORTAG_ATTR_LIST(refcount_finish_one),
- XFS_ERRORTAG_ATTR_LIST(bmap_finish_one),
- XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
- XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
- XFS_ERRORTAG_ATTR_LIST(log_item_pin),
- XFS_ERRORTAG_ATTR_LIST(buf_lru_ref),
- XFS_ERRORTAG_ATTR_LIST(force_repair),
- XFS_ERRORTAG_ATTR_LIST(bad_summary),
- XFS_ERRORTAG_ATTR_LIST(iunlink_fallback),
- XFS_ERRORTAG_ATTR_LIST(buf_ioerror),
- XFS_ERRORTAG_ATTR_LIST(reduce_max_iextents),
- XFS_ERRORTAG_ATTR_LIST(bmap_alloc_minlen_extent),
- XFS_ERRORTAG_ATTR_LIST(ag_resv_fail),
- XFS_ERRORTAG_ATTR_LIST(larp),
- XFS_ERRORTAG_ATTR_LIST(da_leaf_split),
- XFS_ERRORTAG_ATTR_LIST(attr_leaf_to_node),
- XFS_ERRORTAG_ATTR_LIST(wb_delay_ms),
- XFS_ERRORTAG_ATTR_LIST(write_delay_ms),
- XFS_ERRORTAG_ATTR_LIST(exchmaps_finish_one),
- XFS_ERRORTAG_ATTR_LIST(metafile_resv_crit),
- NULL,
+ XFS_ERRTAGS
+ NULL
};
ATTRIBUTE_GROUPS(xfs_errortag);
+#undef XFS_ERRTAG
+
+/* -1 because XFS_ERRTAG_DROP_WRITES got removed, + 1 for NULL termination */
+static_assert(ARRAY_SIZE(xfs_errortag_attrs) == XFS_ERRTAG_MAX);
static const struct kobj_type xfs_errortag_ktype = {
.release = xfs_sysfs_release,
@@ -295,7 +165,6 @@ xfs_errortag_enabled(
bool
xfs_errortag_test(
struct xfs_mount *mp,
- const char *expression,
const char *file,
int line,
unsigned int error_tag)
@@ -321,36 +190,12 @@ xfs_errortag_test(
return false;
xfs_warn_ratelimited(mp,
-"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
- expression, file, line, mp->m_super->s_id);
+"Injecting error at file %s, line %d, on filesystem \"%s\"",
+ file, line, mp->m_super->s_id);
return true;
}
int
-xfs_errortag_get(
- struct xfs_mount *mp,
- unsigned int error_tag)
-{
- if (!xfs_errortag_valid(error_tag))
- return -EINVAL;
-
- return mp->m_errortag[error_tag];
-}
-
-int
-xfs_errortag_set(
- struct xfs_mount *mp,
- unsigned int error_tag,
- unsigned int tag_value)
-{
- if (!xfs_errortag_valid(error_tag))
- return -EINVAL;
-
- mp->m_errortag[error_tag] = tag_value;
- return 0;
-}
-
-int
xfs_errortag_add(
struct xfs_mount *mp,
unsigned int error_tag)
@@ -359,9 +204,8 @@ xfs_errortag_add(
if (!xfs_errortag_valid(error_tag))
return -EINVAL;
-
- return xfs_errortag_set(mp, error_tag,
- xfs_errortag_random_default[error_tag]);
+ mp->m_errortag[error_tag] = xfs_errortag_random_default[error_tag];
+ return 0;
}
int
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 0b9c5ba8a598..fe6a71bbe9cd 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -8,22 +8,17 @@
struct xfs_mount;
-extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
- const char *filename, int linenum,
- xfs_failaddr_t failaddr);
-extern void xfs_corruption_error(const char *tag, int level,
- struct xfs_mount *mp, const void *buf, size_t bufsize,
- const char *filename, int linenum,
- xfs_failaddr_t failaddr);
+void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
+ const char *filename, int linenum, xfs_failaddr_t failaddr);
+void xfs_corruption_error(const char *tag, int level, struct xfs_mount *mp,
+ const void *buf, size_t bufsize, const char *filename,
+ int linenum, xfs_failaddr_t failaddr);
void xfs_buf_corruption_error(struct xfs_buf *bp, xfs_failaddr_t fa);
-extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error,
- const char *name, const void *buf, size_t bufsz,
- xfs_failaddr_t failaddr);
-extern void xfs_verifier_error(struct xfs_buf *bp, int error,
- xfs_failaddr_t failaddr);
-extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error,
- const char *name, const void *buf, size_t bufsz,
- xfs_failaddr_t failaddr);
+void xfs_buf_verifier_error(struct xfs_buf *bp, int error, const char *name,
+ const void *buf, size_t bufsz, xfs_failaddr_t failaddr);
+void xfs_verifier_error(struct xfs_buf *bp, int error, xfs_failaddr_t failaddr);
+void xfs_inode_verifier_error(struct xfs_inode *ip, int error, const char *name,
+ const void *buf, size_t bufsz, xfs_failaddr_t failaddr);
#define XFS_ERROR_REPORT(e, lvl, mp) \
xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
@@ -39,12 +34,12 @@ extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error,
#define XFS_CORRUPTION_DUMP_LEN (128)
#ifdef DEBUG
-extern int xfs_errortag_init(struct xfs_mount *mp);
-extern void xfs_errortag_del(struct xfs_mount *mp);
-extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression,
- const char *file, int line, unsigned int error_tag);
-#define XFS_TEST_ERROR(expr, mp, tag) \
- ((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag)))
+int xfs_errortag_init(struct xfs_mount *mp);
+void xfs_errortag_del(struct xfs_mount *mp);
+bool xfs_errortag_test(struct xfs_mount *mp, const char *file, int line,
+ unsigned int error_tag);
+#define XFS_TEST_ERROR(mp, tag) \
+ xfs_errortag_test((mp), __FILE__, __LINE__, (tag))
bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag);
#define XFS_ERRORTAG_DELAY(mp, tag) \
do { \
@@ -58,17 +53,13 @@ bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag);
mdelay((mp)->m_errortag[(tag)]); \
} while (0)
-extern int xfs_errortag_get(struct xfs_mount *mp, unsigned int error_tag);
-extern int xfs_errortag_set(struct xfs_mount *mp, unsigned int error_tag,
- unsigned int tag_value);
-extern int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag);
-extern int xfs_errortag_clearall(struct xfs_mount *mp);
+int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag);
+int xfs_errortag_clearall(struct xfs_mount *mp);
#else
#define xfs_errortag_init(mp) (0)
#define xfs_errortag_del(mp)
-#define XFS_TEST_ERROR(expr, mp, tag) (expr)
+#define XFS_TEST_ERROR(mp, tag) (false)
#define XFS_ERRORTAG_DELAY(mp, tag) ((void)0)
-#define xfs_errortag_set(mp, tag, val) (ENOSYS)
#define xfs_errortag_add(mp, tag) (ENOSYS)
#define xfs_errortag_clearall(mp) (ENOSYS)
#endif /* DEBUG */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 47ee598a9827..418ddab590e0 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -202,7 +202,7 @@ xfs_efi_copy_format(
sizeof(struct xfs_extent));
return 0;
} else if (buf->iov_len == len32) {
- xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->iov_base;
+ struct xfs_efi_log_format_32 *src_efi_fmt_32 = buf->iov_base;
dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type;
dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size;
@@ -216,7 +216,7 @@ xfs_efi_copy_format(
}
return 0;
} else if (buf->iov_len == len64) {
- xfs_efi_log_format_64_t *src_efi_fmt_64 = buf->iov_base;
+ struct xfs_efi_log_format_64 *src_efi_fmt_64 = buf->iov_base;
dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type;
dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size;
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index c8402040410b..af1b0331f7af 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -49,7 +49,7 @@ struct xfs_efi_log_item {
struct xfs_log_item efi_item;
atomic_t efi_refcount;
atomic_t efi_next_extent;
- xfs_efi_log_format_t efi_format;
+ struct xfs_efi_log_format efi_format;
};
static inline size_t
@@ -69,7 +69,7 @@ struct xfs_efd_log_item {
struct xfs_log_item efd_item;
struct xfs_efi_log_item *efd_efip;
uint efd_next_extent;
- xfs_efd_log_format_t efd_format;
+ struct xfs_efd_log_format efd_format;
};
static inline size_t
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f96fbf5c54c9..2702fef2c90c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -75,52 +75,47 @@ xfs_dir_fsync(
return xfs_log_force_inode(ip);
}
-static xfs_csn_t
-xfs_fsync_seq(
- struct xfs_inode *ip,
- bool datasync)
-{
- if (!xfs_ipincount(ip))
- return 0;
- if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
- return 0;
- return ip->i_itemp->ili_commit_seq;
-}
-
/*
- * All metadata updates are logged, which means that we just have to flush the
- * log up to the latest LSN that touched the inode.
+ * All metadata updates are logged, which means that we just have to push the
+ * journal to the required sequence number than holds the updates. We track
+ * datasync commits separately to full sync commits, and hence only need to
+ * select the correct sequence number for the log force here.
*
- * If we have concurrent fsync/fdatasync() calls, we need them to all block on
- * the log force before we clear the ili_fsync_fields field. This ensures that
- * we don't get a racing sync operation that does not wait for the metadata to
- * hit the journal before returning. If we race with clearing ili_fsync_fields,
- * then all that will happen is the log force will do nothing as the lsn will
- * already be on disk. We can't race with setting ili_fsync_fields because that
- * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock
- * shared until after the ili_fsync_fields is cleared.
+ * We don't have to serialise against concurrent modifications, as we do not
+ * have to wait for modifications that have not yet completed. We define a
+ * transaction commit as completing when the commit sequence number is updated,
+ * hence if the sequence number has not updated, the sync operation has been
+ * run before the commit completed and we don't have to wait for it.
+ *
+ * If we have concurrent fsync/fdatasync() calls, the sequence numbers remain
+ * set on the log item until - at least - the journal flush completes. In
+ * reality, they are only cleared when the inode is fully unpinned (i.e.
+ * persistent in the journal and not dirty in the CIL), and so we rely on
+ * xfs_log_force_seq() either skipping sequences that have been persisted or
+ * waiting on sequences that are still in flight to correctly order concurrent
+ * sync operations.
*/
-static int
+static int
xfs_fsync_flush_log(
struct xfs_inode *ip,
bool datasync,
int *log_flushed)
{
- int error = 0;
- xfs_csn_t seq;
+ struct xfs_inode_log_item *iip = ip->i_itemp;
+ xfs_csn_t seq = 0;
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- seq = xfs_fsync_seq(ip, datasync);
- if (seq) {
- error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
- log_flushed);
+ spin_lock(&iip->ili_lock);
+ if (datasync)
+ seq = iip->ili_datasync_seq;
+ else
+ seq = iip->ili_commit_seq;
+ spin_unlock(&iip->ili_lock);
- spin_lock(&ip->i_itemp->ili_lock);
- ip->i_itemp->ili_fsync_fields = 0;
- spin_unlock(&ip->i_itemp->ili_lock);
- }
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- return error;
+ if (!seq)
+ return 0;
+
+ return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
+ log_flushed);
}
STATIC int
@@ -158,12 +153,10 @@ xfs_file_fsync(
error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
/*
- * Any inode that has dirty modifications in the log is pinned. The
- * racy check here for a pinned inode will not catch modifications
- * that happen concurrently to the fsync call, but fsync semantics
- * only require to sync previously completed I/O.
+ * If the inode has a inode log item attached, it may need the journal
+ * flushed to persist any changes the log item might be tracking.
*/
- if (xfs_ipincount(ip)) {
+ if (ip->i_itemp) {
err2 = xfs_fsync_flush_log(ip, datasync, &log_flushed);
if (err2 && !error)
error = err2;
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index f6f628c01feb..566fd663c95b 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -14,8 +14,6 @@
*/
xfs_param_t xfs_params = {
/* MIN DFLT MAX */
- .sgid_inherit = { 0, 0, 1 },
- .symlink_mode = { 0, 0, 1 },
.panic_mask = { 0, 0, XFS_PTAG_MASK},
.error_level = { 0, 3, 11 },
.syncd_timer = { 1*100, 30*100, 7200*100},
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 4cf7abe50143..e44040206851 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -646,8 +646,7 @@ xfs_iget_cache_miss(
goto out_destroy;
/*
- * For version 5 superblocks, if we are initialising a new inode and we
- * are not utilising the XFS_FEAT_IKEEP inode cluster mode, we can
+ * For version 5 superblocks, if we are initialising a new inode, we
* simply build the new inode core with a random generation number.
*
* For version 4 (and older) superblocks, log recovery is dependent on
@@ -655,8 +654,7 @@ xfs_iget_cache_miss(
* value and hence we must also read the inode off disk even when
* initializing new inodes.
*/
- if (xfs_has_v3inodes(mp) &&
- (flags & XFS_IGET_CREATE) && !xfs_has_ikeep(mp)) {
+ if (xfs_has_v3inodes(mp) && (flags & XFS_IGET_CREATE)) {
VFS_I(ip)->i_generation = get_random_u32();
} else {
struct xfs_buf *bp;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 9c39251961a3..36b39539e561 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -877,6 +877,35 @@ xfs_create_tmpfile(
return error;
}
+static inline int
+xfs_projid_differ(
+ struct xfs_inode *tdp,
+ struct xfs_inode *sip)
+{
+ /*
+ * If we are using project inheritance, we only allow hard link/renames
+ * creation in our tree when the project IDs are the same; else
+ * the tree quota mechanism could be circumvented.
+ */
+ if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
+ tdp->i_projid != sip->i_projid)) {
+ /*
+ * Project quota setup skips special files which can
+ * leave inodes in a PROJINHERIT directory without a
+ * project ID set. We need to allow links to be made
+ * to these "project-less" inodes because userspace
+ * expects them to succeed after project ID setup,
+ * but everything else should be rejected.
+ */
+ if (!special_file(VFS_I(sip)->i_mode) ||
+ sip->i_projid != 0) {
+ return -EXDEV;
+ }
+ }
+
+ return 0;
+}
+
int
xfs_link(
struct xfs_inode *tdp,
@@ -930,27 +959,9 @@ xfs_link(
goto error_return;
}
- /*
- * If we are using project inheritance, we only allow hard link
- * creation in our tree when the project IDs are the same; else
- * the tree quota mechanism could be circumvented.
- */
- if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
- tdp->i_projid != sip->i_projid)) {
- /*
- * Project quota setup skips special files which can
- * leave inodes in a PROJINHERIT directory without a
- * project ID set. We need to allow links to be made
- * to these "project-less" inodes because userspace
- * expects them to succeed after project ID setup,
- * but everything else should be rejected.
- */
- if (!special_file(VFS_I(sip)->i_mode) ||
- sip->i_projid != 0) {
- error = -EXDEV;
- goto error_return;
- }
- }
+ error = xfs_projid_differ(tdp, sip);
+ if (error)
+ goto error_return;
error = xfs_dir_add_child(tp, resblks, &du);
if (error)
@@ -1035,7 +1046,7 @@ xfs_itruncate_extents_flags(
int error = 0;
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
- if (atomic_read(&VFS_I(ip)->i_count))
+ if (icount_read(VFS_I(ip)))
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
ASSERT(new_size <= XFS_ISIZE(ip));
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -1656,7 +1667,6 @@ retry:
spin_lock(&iip->ili_lock);
iip->ili_last_fields = iip->ili_fields;
iip->ili_fields = 0;
- iip->ili_fsync_fields = 0;
spin_unlock(&iip->ili_lock);
ASSERT(iip->ili_last_fields);
@@ -1821,12 +1831,20 @@ static void
xfs_iunpin(
struct xfs_inode *ip)
{
- xfs_assert_ilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED);
+ struct xfs_inode_log_item *iip = ip->i_itemp;
+ xfs_csn_t seq = 0;
trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED);
+
+ spin_lock(&iip->ili_lock);
+ seq = iip->ili_commit_seq;
+ spin_unlock(&iip->ili_lock);
+ if (!seq)
+ return;
/* Give the log a push to start the unpinning I/O */
- xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL);
+ xfs_log_force_seq(ip->i_mount, seq, 0, NULL);
}
@@ -2227,16 +2245,9 @@ retry:
if (du_wip.ip)
xfs_trans_ijoin(tp, du_wip.ip, 0);
- /*
- * If we are using project inheritance, we only allow renames
- * into our tree when the project IDs are the same; else the
- * tree quota mechanism would be circumvented.
- */
- if (unlikely((target_dp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
- target_dp->i_projid != src_ip->i_projid)) {
- error = -EXDEV;
+ error = xfs_projid_differ(target_dp, src_ip);
+ if (error)
goto out_trans_cancel;
- }
/* RENAME_EXCHANGE is unique from here on. */
if (flags & RENAME_EXCHANGE) {
@@ -2377,8 +2388,8 @@ xfs_iflush(
* error handling as the caller will shutdown and fail the buffer.
*/
error = -EFSCORRUPTED;
- if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
- mp, XFS_ERRTAG_IFLUSH_1)) {
+ if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC) ||
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad inode %llu magic number 0x%x, ptr "PTR_FMT,
__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
@@ -2394,29 +2405,27 @@ xfs_iflush(
goto flush_out;
}
} else if (S_ISREG(VFS_I(ip)->i_mode)) {
- if (XFS_TEST_ERROR(
- ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
- ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
- mp, XFS_ERRTAG_IFLUSH_3)) {
+ if ((ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
+ ip->i_df.if_format != XFS_DINODE_FMT_BTREE) ||
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_3)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad regular inode %llu, ptr "PTR_FMT,
__func__, ip->i_ino, ip);
goto flush_out;
}
} else if (S_ISDIR(VFS_I(ip)->i_mode)) {
- if (XFS_TEST_ERROR(
- ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
- ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
- ip->i_df.if_format != XFS_DINODE_FMT_LOCAL,
- mp, XFS_ERRTAG_IFLUSH_4)) {
+ if ((ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
+ ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
+ ip->i_df.if_format != XFS_DINODE_FMT_LOCAL) ||
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_4)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad directory inode %llu, ptr "PTR_FMT,
__func__, ip->i_ino, ip);
goto flush_out;
}
}
- if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) >
- ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
+ if (ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) >
+ ip->i_nblocks || XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_5)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: detected corrupt incore inode %llu, "
"total extents = %llu nblocks = %lld, ptr "PTR_FMT,
@@ -2425,8 +2434,8 @@ xfs_iflush(
ip->i_nblocks, ip);
goto flush_out;
}
- if (XFS_TEST_ERROR(ip->i_forkoff > mp->m_sb.sb_inodesize,
- mp, XFS_ERRTAG_IFLUSH_6)) {
+ if (ip->i_forkoff > mp->m_sb.sb_inodesize ||
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_6)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: bad inode %llu, forkoff 0x%x, ptr "PTR_FMT,
__func__, ip->i_ino, ip->i_forkoff, ip);
@@ -2502,7 +2511,6 @@ flush_out:
spin_lock(&iip->ili_lock);
iip->ili_last_fields = iip->ili_fields;
iip->ili_fields = 0;
- iip->ili_fsync_fields = 0;
set_bit(XFS_LI_FLUSHING, &iip->ili_item.li_flags);
spin_unlock(&iip->ili_lock);
@@ -2661,12 +2669,15 @@ int
xfs_log_force_inode(
struct xfs_inode *ip)
{
+ struct xfs_inode_log_item *iip = ip->i_itemp;
xfs_csn_t seq = 0;
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- if (xfs_ipincount(ip))
- seq = ip->i_itemp->ili_commit_seq;
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ if (!iip)
+ return 0;
+
+ spin_lock(&iip->ili_lock);
+ seq = iip->ili_commit_seq;
+ spin_unlock(&iip->ili_lock);
if (!seq)
return 0;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 829675700fcd..1bd411a1114c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -131,46 +131,28 @@ xfs_inode_item_precommit(
}
/*
- * Inode verifiers do not check that the extent size hint is an integer
- * multiple of the rt extent size on a directory with both rtinherit
- * and extszinherit flags set. If we're logging a directory that is
- * misconfigured in this way, clear the hint.
+ * Inode verifiers do not check that the extent size hints are an
+ * integer multiple of the rt extent size on a directory with
+ * rtinherit flags set. If we're logging a directory that is
+ * misconfigured in this way, clear the bad hints.
*/
- if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
- (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
- xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) {
- ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
- XFS_DIFLAG_EXTSZINHERIT);
- ip->i_extsize = 0;
- flags |= XFS_ILOG_CORE;
+ if (ip->i_diflags & XFS_DIFLAG_RTINHERIT) {
+ if ((ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
+ xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) {
+ ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
+ XFS_DIFLAG_EXTSZINHERIT);
+ ip->i_extsize = 0;
+ flags |= XFS_ILOG_CORE;
+ }
+ if ((ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
+ xfs_extlen_to_rtxmod(ip->i_mount, ip->i_cowextsize) > 0) {
+ ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
+ ip->i_cowextsize = 0;
+ flags |= XFS_ILOG_CORE;
+ }
}
- /*
- * Record the specific change for fdatasync optimisation. This allows
- * fdatasync to skip log forces for inodes that are only timestamp
- * dirty. Once we've processed the XFS_ILOG_IVERSION flag, convert it
- * to XFS_ILOG_CORE so that the actual on-disk dirty tracking
- * (ili_fields) correctly tracks that the version has changed.
- */
spin_lock(&iip->ili_lock);
- iip->ili_fsync_fields |= (flags & ~XFS_ILOG_IVERSION);
- if (flags & XFS_ILOG_IVERSION)
- flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE);
-
- /*
- * Inode verifiers do not check that the CoW extent size hint is an
- * integer multiple of the rt extent size on a directory with both
- * rtinherit and cowextsize flags set. If we're logging a directory
- * that is misconfigured in this way, clear the hint.
- */
- if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
- (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
- xfs_extlen_to_rtxmod(ip->i_mount, ip->i_cowextsize) > 0) {
- ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
- ip->i_cowextsize = 0;
- flags |= XFS_ILOG_CORE;
- }
-
if (!iip->ili_item.li_buf) {
struct xfs_buf *bp;
int error;
@@ -205,6 +187,20 @@ xfs_inode_item_precommit(
}
/*
+ * Store the dirty flags back into the inode item as this state is used
+ * later on in xfs_inode_item_committing() to determine whether the
+ * transaction is relevant to fsync state or not.
+ */
+ iip->ili_dirty_flags = flags;
+
+ /*
+ * Convert the flags on-disk fields that have been modified in the
+ * transaction so that ili_fields tracks the changes correctly.
+ */
+ if (flags & XFS_ILOG_IVERSION)
+ flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE);
+
+ /*
* Always OR in the bits from the ili_last_fields field. This is to
* coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
* in the eventual clearing of the ili_fields bits. See the big comment
@@ -214,12 +210,6 @@ xfs_inode_item_precommit(
spin_unlock(&iip->ili_lock);
xfs_inode_item_precommit_check(ip);
-
- /*
- * We are done with the log item transaction dirty state, so clear it so
- * that it doesn't pollute future transactions.
- */
- iip->ili_dirty_flags = 0;
return 0;
}
@@ -729,13 +719,24 @@ xfs_inode_item_unpin(
struct xfs_log_item *lip,
int remove)
{
- struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
+ struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+ struct xfs_inode *ip = iip->ili_inode;
trace_xfs_inode_unpin(ip, _RET_IP_);
ASSERT(lip->li_buf || xfs_iflags_test(ip, XFS_ISTALE));
ASSERT(atomic_read(&ip->i_pincount) > 0);
- if (atomic_dec_and_test(&ip->i_pincount))
+
+ /*
+ * If this is the last unpin, then the inode no longer needs a journal
+ * flush to persist it. Hence we can clear the commit sequence numbers
+ * as a fsync/fdatasync operation on the inode at this point is a no-op.
+ */
+ if (atomic_dec_and_lock(&ip->i_pincount, &iip->ili_lock)) {
+ iip->ili_commit_seq = 0;
+ iip->ili_datasync_seq = 0;
+ spin_unlock(&iip->ili_lock);
wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
+ }
}
STATIC uint
@@ -858,12 +859,45 @@ xfs_inode_item_committed(
return lsn;
}
+/*
+ * The modification is now complete, so before we unlock the inode we need to
+ * update the commit sequence numbers for data integrity journal flushes. We
+ * always record the commit sequence number (ili_commit_seq) so that anything
+ * that needs a full journal sync will capture all of this modification.
+ *
+ * We then
+ * check if the changes will impact a datasync (O_DSYNC) journal flush. If the
+ * changes will require a datasync flush, then we also record the sequence in
+ * ili_datasync_seq.
+ *
+ * These commit sequence numbers will get cleared atomically with the inode being
+ * unpinned (i.e. pin count goes to zero), and so it will only be set when the
+ * inode is dirty in the journal. This removes the need for checking if the
+ * inode is pinned to determine if a journal flush is necessary, and hence
+ * removes the need for holding the ILOCK_SHARED in xfs_file_fsync() to
+ * serialise pin counts against commit sequence number updates.
+ *
+ */
STATIC void
xfs_inode_item_committing(
struct xfs_log_item *lip,
xfs_csn_t seq)
{
- INODE_ITEM(lip)->ili_commit_seq = seq;
+ struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+
+ spin_lock(&iip->ili_lock);
+ iip->ili_commit_seq = seq;
+ if (iip->ili_dirty_flags & ~(XFS_ILOG_IVERSION | XFS_ILOG_TIMESTAMP))
+ iip->ili_datasync_seq = seq;
+ spin_unlock(&iip->ili_lock);
+
+ /*
+ * Clear the per-transaction dirty flags now that we have finished
+ * recording the transaction's inode modifications in the CIL and are
+ * about to release and (maybe) unlock the inode.
+ */
+ iip->ili_dirty_flags = 0;
+
return xfs_inode_item_release(lip);
}
@@ -1055,7 +1089,6 @@ xfs_iflush_abort_clean(
{
iip->ili_last_fields = 0;
iip->ili_fields = 0;
- iip->ili_fsync_fields = 0;
iip->ili_flush_lsn = 0;
iip->ili_item.li_buf = NULL;
list_del_init(&iip->ili_item.li_bio_list);
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index ba92ce11a011..2ddcca41714f 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -32,9 +32,17 @@ struct xfs_inode_log_item {
spinlock_t ili_lock; /* flush state lock */
unsigned int ili_last_fields; /* fields when flushed */
unsigned int ili_fields; /* fields to be logged */
- unsigned int ili_fsync_fields; /* logged since last fsync */
xfs_lsn_t ili_flush_lsn; /* lsn at last flush */
+
+ /*
+ * We record the sequence number for every inode modification, as
+ * well as those that only require fdatasync operations for data
+ * integrity. This allows optimisation of the O_DSYNC/fdatasync path
+ * without needing to track what modifications the journal is currently
+ * carrying for the inode. These are protected by the above ili_lock.
+ */
xfs_csn_t ili_commit_seq; /* last transaction commit */
+ xfs_csn_t ili_datasync_seq; /* for datasync optimisation */
};
static inline int xfs_inode_clean(struct xfs_inode *ip)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index e1051a530a50..a6bb7ee7a27a 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -512,9 +512,6 @@ xfs_fileattr_get(
{
struct xfs_inode *ip = XFS_I(d_inode(dentry));
- if (d_is_special(dentry))
- return -ENOTTY;
-
xfs_ilock(ip, XFS_ILOCK_SHARED);
xfs_fill_fsxattr(ip, XFS_DATA_FORK, fa);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -736,9 +733,6 @@ xfs_fileattr_set(
trace_xfs_ioctl_setattr(ip);
- if (d_is_special(dentry))
- return -ENOTTY;
-
if (!fa->fsx_valid) {
if (fa->flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL |
FS_NOATIME_FL | FS_NODUMP_FL |
@@ -1209,21 +1203,21 @@ xfs_file_ioctl(
current->comm);
return -ENOTTY;
case XFS_IOC_DIOINFO: {
- struct xfs_buftarg *target = xfs_inode_buftarg(ip);
+ struct kstat st;
struct dioattr da;
- da.d_mem = target->bt_logical_sectorsize;
+ error = vfs_getattr(&filp->f_path, &st, STATX_DIOALIGN, 0);
+ if (error)
+ return error;
/*
- * See xfs_report_dioalign() for an explanation about why this
- * reports a value larger than the sector size for COW inodes.
+ * Some userspace directly feeds the return value to
+ * posix_memalign, which fails for values that are smaller than
+ * the pointer size. Round up the value to not break userspace.
*/
- if (xfs_is_cow_inode(ip))
- da.d_miniosz = xfs_inode_alloc_unitsize(ip);
- else
- da.d_miniosz = target->bt_logical_sectorsize;
+ da.d_mem = roundup(st.dio_mem_align, sizeof(void *));
+ da.d_miniosz = st.dio_offset_align;
da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
-
if (copy_to_user(arg, &da, sizeof(da)))
return -EFAULT;
return 0;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 2a74f2957341..d3f6e3e42a11 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -149,9 +149,18 @@ xfs_bmbt_to_iomap(
iomap->bdev = target->bt_bdev;
iomap->flags = iomap_flags;
- if (xfs_ipincount(ip) &&
- (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
- iomap->flags |= IOMAP_F_DIRTY;
+ /*
+ * If the inode is dirty for datasync purposes, let iomap know so it
+ * doesn't elide the IO completion journal flushes on O_DSYNC IO.
+ */
+ if (ip->i_itemp) {
+ struct xfs_inode_log_item *iip = ip->i_itemp;
+
+ spin_lock(&iip->ili_lock);
+ if (iip->ili_datasync_seq)
+ iomap->flags |= IOMAP_F_DIRTY;
+ spin_unlock(&iip->ili_lock);
+ }
iomap->validity_cookie = sequence_cookie;
return 0;
@@ -1554,7 +1563,7 @@ xfs_zoned_buffered_write_iomap_begin(
return error;
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) ||
- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
xfs_bmap_mark_sick(ip, XFS_DATA_FORK);
error = -EFSCORRUPTED;
goto out_unlock;
@@ -1728,7 +1737,7 @@ xfs_buffered_write_iomap_begin(
return error;
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) ||
- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
xfs_bmap_mark_sick(ip, XFS_DATA_FORK);
error = -EFSCORRUPTED;
goto out_unlock;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 603effabe1ee..caff0125faea 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -431,14 +431,12 @@ xfs_vn_symlink(
struct dentry *dentry,
const char *symname)
{
- struct inode *inode;
- struct xfs_inode *cip = NULL;
- struct xfs_name name;
- int error;
- umode_t mode;
+ struct inode *inode;
+ struct xfs_inode *cip = NULL;
+ struct xfs_name name;
+ int error;
+ umode_t mode = S_IFLNK | S_IRWXUGO;
- mode = S_IFLNK |
- (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
error = xfs_dentry_mode_to_name(&name, dentry, mode);
if (unlikely(error))
goto out;
@@ -1335,6 +1333,8 @@ static const struct inode_operations xfs_symlink_inode_operations = {
.setattr = xfs_vn_setattr,
.listxattr = xfs_vn_listxattr,
.update_time = xfs_vn_update_time,
+ .fileattr_get = xfs_fileattr_get,
+ .fileattr_set = xfs_fileattr_set,
};
/* Figure out if this file actually supports DAX. */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 9a2221b4aa21..4dd747bdbcca 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -89,8 +89,6 @@ typedef __u32 xfs_nlink_t;
#undef XFS_NATIVE_HOST
#endif
-#define irix_sgid_inherit xfs_params.sgid_inherit.val
-#define irix_symlink_mode xfs_params.symlink_mode.val
#define xfs_panic_mask xfs_params.panic_mask.val
#define xfs_error_level xfs_params.error_level.val
#define xfs_syncd_centisecs xfs_params.syncd_timer.val
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c8a57e21a1d3..603e85c1ab4c 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -969,8 +969,8 @@ xfs_log_unmount_write(
* counters will be recalculated. Refer to xlog_check_unmount_rec for
* more details.
*/
- if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
- XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
+ if (xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS) ||
+ XFS_TEST_ERROR(mp, XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
xfs_alert(mp, "%s: will fix summary counters at next mount",
__func__);
return;
@@ -1240,7 +1240,7 @@ xlog_ioend_work(
/*
* Race to shutdown the filesystem if we see an error.
*/
- if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
+ if (error || XFS_TEST_ERROR(log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
xfs_alert(log->l_mp, "log I/O error %d", error);
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
}
@@ -1489,8 +1489,7 @@ xlog_alloc_log(
log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
- XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM |
- WQ_HIGHPRI),
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_PERCPU),
0, mp->m_super->s_id);
if (!log->l_ioend_workqueue)
goto out_free_iclog;
@@ -1568,13 +1567,13 @@ xlog_cksum(
struct xlog *log,
struct xlog_rec_header *rhead,
char *dp,
- int size)
+ unsigned int hdrsize,
+ unsigned int size)
{
uint32_t crc;
/* first generate the crc for the record header ... */
- crc = xfs_start_cksum_update((char *)rhead,
- sizeof(struct xlog_rec_header),
+ crc = xfs_start_cksum_update((char *)rhead, hdrsize,
offsetof(struct xlog_rec_header, h_crc));
/* ... then for additional cycle data for v2 logs ... */
@@ -1818,7 +1817,7 @@ xlog_sync(
/* calculcate the checksum */
iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
- iclog->ic_datap, size);
+ iclog->ic_datap, XLOG_REC_SIZE, size);
/*
* Intentionally corrupt the log record CRC based on the error injection
* frequency, if defined. This facilitates testing log recovery in the
@@ -1827,7 +1826,7 @@ xlog_sync(
* detects the bad CRC and attempts to recover.
*/
#ifdef DEBUG
- if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
+ if (XFS_TEST_ERROR(log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
iclog->ic_fail_crc = true;
xfs_warn(log->l_mp,
@@ -2656,10 +2655,11 @@ restart:
* until you know exactly how many bytes get copied. Therefore, wait
* until later to update ic_offset.
*
- * xlog_write() algorithm assumes that at least 2 xlog_op_header_t's
+ * xlog_write() algorithm assumes that at least 2 xlog_op_header's
* can fit into remaining data section.
*/
- if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) {
+ if (iclog->ic_size - iclog->ic_offset <
+ 2 * sizeof(struct xlog_op_header)) {
int error = 0;
xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
@@ -3153,11 +3153,11 @@ xlog_calc_unit_res(
*/
/* for trans header */
- unit_bytes += sizeof(xlog_op_header_t);
- unit_bytes += sizeof(xfs_trans_header_t);
+ unit_bytes += sizeof(struct xlog_op_header);
+ unit_bytes += sizeof(struct xfs_trans_header);
/* for start-rec */
- unit_bytes += sizeof(xlog_op_header_t);
+ unit_bytes += sizeof(struct xlog_op_header);
/*
* for LR headers - the space for data in an iclog is the size minus
@@ -3180,12 +3180,12 @@ xlog_calc_unit_res(
num_headers = howmany(unit_bytes, iclog_space);
/* for split-recs - ophdrs added when data split over LRs */
- unit_bytes += sizeof(xlog_op_header_t) * num_headers;
+ unit_bytes += sizeof(struct xlog_op_header) * num_headers;
/* add extra header reservations if we overrun */
while (!num_headers ||
howmany(unit_bytes, iclog_space) > num_headers) {
- unit_bytes += sizeof(xlog_op_header_t);
+ unit_bytes += sizeof(struct xlog_op_header);
num_headers++;
}
unit_bytes += log->l_iclog_hsize * num_headers;
@@ -3322,7 +3322,7 @@ xlog_verify_iclog(
struct xlog_in_core *iclog,
int count)
{
- xlog_op_header_t *ophead;
+ struct xlog_op_header *ophead;
xlog_in_core_t *icptr;
xlog_in_core_2_t *xhdr;
void *base_ptr, *ptr, *p;
@@ -3400,7 +3400,7 @@ xlog_verify_iclog(
op_len = be32_to_cpu(iclog->ic_header.h_cycle_data[idx]);
}
}
- ptr += sizeof(xlog_op_header_t) + op_len;
+ ptr += sizeof(struct xlog_op_header) + op_len;
}
}
#endif
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index af6daf4f6792..dcc1f44ed68f 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -20,6 +20,43 @@ struct xfs_log_vec {
int lv_alloc_size; /* size of allocated lv */
};
+/* Region types for iovec's i_type */
+#define XLOG_REG_TYPE_BFORMAT 1
+#define XLOG_REG_TYPE_BCHUNK 2
+#define XLOG_REG_TYPE_EFI_FORMAT 3
+#define XLOG_REG_TYPE_EFD_FORMAT 4
+#define XLOG_REG_TYPE_IFORMAT 5
+#define XLOG_REG_TYPE_ICORE 6
+#define XLOG_REG_TYPE_IEXT 7
+#define XLOG_REG_TYPE_IBROOT 8
+#define XLOG_REG_TYPE_ILOCAL 9
+#define XLOG_REG_TYPE_IATTR_EXT 10
+#define XLOG_REG_TYPE_IATTR_BROOT 11
+#define XLOG_REG_TYPE_IATTR_LOCAL 12
+#define XLOG_REG_TYPE_QFORMAT 13
+#define XLOG_REG_TYPE_DQUOT 14
+#define XLOG_REG_TYPE_QUOTAOFF 15
+#define XLOG_REG_TYPE_LRHEADER 16
+#define XLOG_REG_TYPE_UNMOUNT 17
+#define XLOG_REG_TYPE_COMMIT 18
+#define XLOG_REG_TYPE_TRANSHDR 19
+#define XLOG_REG_TYPE_ICREATE 20
+#define XLOG_REG_TYPE_RUI_FORMAT 21
+#define XLOG_REG_TYPE_RUD_FORMAT 22
+#define XLOG_REG_TYPE_CUI_FORMAT 23
+#define XLOG_REG_TYPE_CUD_FORMAT 24
+#define XLOG_REG_TYPE_BUI_FORMAT 25
+#define XLOG_REG_TYPE_BUD_FORMAT 26
+#define XLOG_REG_TYPE_ATTRI_FORMAT 27
+#define XLOG_REG_TYPE_ATTRD_FORMAT 28
+#define XLOG_REG_TYPE_ATTR_NAME 29
+#define XLOG_REG_TYPE_ATTR_VALUE 30
+#define XLOG_REG_TYPE_XMI_FORMAT 31
+#define XLOG_REG_TYPE_XMD_FORMAT 32
+#define XLOG_REG_TYPE_ATTR_NEWNAME 33
+#define XLOG_REG_TYPE_ATTR_NEWVALUE 34
+#define XLOG_REG_TYPE_MAX 34
+
#define XFS_LOG_VEC_ORDERED (-1)
/*
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index a9a7a271c15b..0cfc654d8e87 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -499,8 +499,8 @@ xlog_recover_finish(
extern void
xlog_recover_cancel(struct xlog *);
-extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
- char *dp, int size);
+__le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
+ char *dp, unsigned int hdrsize, unsigned int size);
extern struct kmem_cache *xfs_log_ticket_cache;
struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index e6ed9e09c027..549d60959aee 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2894,20 +2894,34 @@ xlog_recover_process(
int pass,
struct list_head *buffer_list)
{
- __le32 old_crc = rhead->h_crc;
- __le32 crc;
+ __le32 expected_crc = rhead->h_crc, crc, other_crc;
- crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
+ crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE,
+ be32_to_cpu(rhead->h_len));
+
+ /*
+ * Look at the end of the struct xlog_rec_header definition in
+ * xfs_log_format.h for the glory details.
+ */
+ if (expected_crc && crc != expected_crc) {
+ other_crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE_OTHER,
+ be32_to_cpu(rhead->h_len));
+ if (other_crc == expected_crc) {
+ xfs_notice_once(log->l_mp,
+ "Fixing up incorrect CRC due to padding.");
+ crc = other_crc;
+ }
+ }
/*
* Nothing else to do if this is a CRC verification pass. Just return
* if this a record with a non-zero crc. Unfortunately, mkfs always
- * sets old_crc to 0 so we must consider this valid even on v5 supers.
- * Otherwise, return EFSBADCRC on failure so the callers up the stack
- * know precisely what failed.
+ * sets expected_crc to 0 so we must consider this valid even on v5
+ * supers. Otherwise, return EFSBADCRC on failure so the callers up the
+ * stack know precisely what failed.
*/
if (pass == XLOG_RECOVER_CRCPASS) {
- if (old_crc && crc != old_crc)
+ if (expected_crc && crc != expected_crc)
return -EFSBADCRC;
return 0;
}
@@ -2918,11 +2932,11 @@ xlog_recover_process(
* zero CRC check prevents warnings from being emitted when upgrading
* the kernel from one that does not add CRCs by default.
*/
- if (crc != old_crc) {
- if (old_crc || xfs_has_crc(log->l_mp)) {
+ if (crc != expected_crc) {
+ if (expected_crc || xfs_has_crc(log->l_mp)) {
xfs_alert(log->l_mp,
"log record CRC mismatch: found 0x%x, expected 0x%x.",
- le32_to_cpu(old_crc),
+ le32_to_cpu(expected_crc),
le32_to_cpu(crc));
xfs_hex_dump(dp, 32);
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index dc32c5e34d81..0953f6ae94ab 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1057,19 +1057,6 @@ xfs_mountfs(
xfs_inodegc_start(mp);
xfs_blockgc_start(mp);
- /*
- * Now that we've recovered any pending superblock feature bit
- * additions, we can finish setting up the attr2 behaviour for the
- * mount. The noattr2 option overrides the superblock flag, so only
- * check the superblock feature flag if the mount option is not set.
- */
- if (xfs_has_noattr2(mp)) {
- mp->m_features &= ~XFS_FEAT_ATTR2;
- } else if (!xfs_has_attr2(mp) &&
- (mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT)) {
- mp->m_features |= XFS_FEAT_ATTR2;
- }
-
if (xfs_has_metadir(mp)) {
error = xfs_mount_setup_metadir(mp);
if (error)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 97de44c32272..f046d1215b04 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -363,7 +363,6 @@ typedef struct xfs_mount {
#define XFS_FEAT_EXTFLG (1ULL << 7) /* unwritten extents */
#define XFS_FEAT_ASCIICI (1ULL << 8) /* ASCII only case-insens. */
#define XFS_FEAT_LAZYSBCOUNT (1ULL << 9) /* Superblk counters */
-#define XFS_FEAT_ATTR2 (1ULL << 10) /* dynamic attr fork */
#define XFS_FEAT_PARENT (1ULL << 11) /* parent pointers */
#define XFS_FEAT_PROJID32 (1ULL << 12) /* 32 bit project id */
#define XFS_FEAT_CRC (1ULL << 13) /* metadata CRCs */
@@ -386,7 +385,6 @@ typedef struct xfs_mount {
/* Mount features */
#define XFS_FEAT_NOLIFETIME (1ULL << 47) /* disable lifetime hints */
-#define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */
#define XFS_FEAT_NOALIGN (1ULL << 49) /* ignore alignment */
#define XFS_FEAT_ALLOCSIZE (1ULL << 50) /* user specified allocation size */
#define XFS_FEAT_LARGE_IOSIZE (1ULL << 51) /* report large preferred
@@ -396,7 +394,6 @@ typedef struct xfs_mount {
#define XFS_FEAT_DISCARD (1ULL << 54) /* discard unused blocks */
#define XFS_FEAT_GRPID (1ULL << 55) /* group-ID assigned from directory */
#define XFS_FEAT_SMALL_INUMS (1ULL << 56) /* user wants 32bit inodes */
-#define XFS_FEAT_IKEEP (1ULL << 57) /* keep empty inode clusters*/
#define XFS_FEAT_SWALLOC (1ULL << 58) /* stripe width allocation */
#define XFS_FEAT_FILESTREAMS (1ULL << 59) /* use filestreams allocator */
#define XFS_FEAT_DAX_ALWAYS (1ULL << 60) /* DAX always enabled */
@@ -504,12 +501,17 @@ __XFS_HAS_V4_FEAT(align, ALIGN)
__XFS_HAS_V4_FEAT(logv2, LOGV2)
__XFS_HAS_V4_FEAT(extflg, EXTFLG)
__XFS_HAS_V4_FEAT(lazysbcount, LAZYSBCOUNT)
-__XFS_ADD_V4_FEAT(attr2, ATTR2)
__XFS_ADD_V4_FEAT(projid32, PROJID32)
__XFS_HAS_V4_FEAT(v3inodes, V3INODES)
__XFS_HAS_V4_FEAT(crc, CRC)
__XFS_HAS_V4_FEAT(pquotino, PQUOTINO)
+static inline void xfs_add_attr2(struct xfs_mount *mp)
+{
+ if (IS_ENABLED(CONFIG_XFS_SUPPORT_V4))
+ xfs_sb_version_addattr2(&mp->m_sb);
+}
+
/*
* Mount features
*
@@ -517,7 +519,6 @@ __XFS_HAS_V4_FEAT(pquotino, PQUOTINO)
* bit inodes and read-only state, are kept as operational state rather than
* features.
*/
-__XFS_HAS_FEAT(noattr2, NOATTR2)
__XFS_HAS_FEAT(noalign, NOALIGN)
__XFS_HAS_FEAT(allocsize, ALLOCSIZE)
__XFS_HAS_FEAT(large_iosize, LARGE_IOSIZE)
@@ -526,7 +527,6 @@ __XFS_HAS_FEAT(dirsync, DIRSYNC)
__XFS_HAS_FEAT(discard, DISCARD)
__XFS_HAS_FEAT(grpid, GRPID)
__XFS_HAS_FEAT(small_inums, SMALL_INUMS)
-__XFS_HAS_FEAT(ikeep, IKEEP)
__XFS_HAS_FEAT(swalloc, SWALLOC)
__XFS_HAS_FEAT(filestreams, FILESTREAMS)
__XFS_HAS_FEAT(dax_always, DAX_ALWAYS)
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 866c71d9fbae..73b7e72944e4 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -293,7 +293,8 @@ int
xfs_mru_cache_init(void)
{
xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache",
- XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 1);
+ XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU),
+ 1);
if (!xfs_mru_reap_wq)
return -ENOMEM;
return 0;
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index fbeddcac4792..b17672889942 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -165,7 +165,7 @@ xfs_dax_translate_range(
uint64_t *bblen)
{
u64 dev_start = btp->bt_dax_part_off;
- u64 dev_len = bdev_nr_bytes(btp->bt_bdev);
+ u64 dev_len = BBTOB(btp->bt_nr_sectors);
u64 dev_end = dev_start + dev_len - 1;
/* Notify failure on the whole device. */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index bb0a82635a77..e85a156dc17d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -105,8 +105,8 @@ enum {
Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
- Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
- Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
+ Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32,
+ Opt_largeio, Opt_nolargeio,
Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
@@ -133,12 +133,8 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = {
fsparam_flag("norecovery", Opt_norecovery),
fsparam_flag("inode64", Opt_inode64),
fsparam_flag("inode32", Opt_inode32),
- fsparam_flag("ikeep", Opt_ikeep),
- fsparam_flag("noikeep", Opt_noikeep),
fsparam_flag("largeio", Opt_largeio),
fsparam_flag("nolargeio", Opt_nolargeio),
- fsparam_flag("attr2", Opt_attr2),
- fsparam_flag("noattr2", Opt_noattr2),
fsparam_flag("filestreams", Opt_filestreams),
fsparam_flag("quota", Opt_quota),
fsparam_flag("noquota", Opt_noquota),
@@ -175,13 +171,11 @@ xfs_fs_show_options(
{
static struct proc_xfs_info xfs_info_set[] = {
/* the few simple ones we can get from the mount struct */
- { XFS_FEAT_IKEEP, ",ikeep" },
{ XFS_FEAT_WSYNC, ",wsync" },
{ XFS_FEAT_NOALIGN, ",noalign" },
{ XFS_FEAT_SWALLOC, ",swalloc" },
{ XFS_FEAT_NOUUID, ",nouuid" },
{ XFS_FEAT_NORECOVERY, ",norecovery" },
- { XFS_FEAT_ATTR2, ",attr2" },
{ XFS_FEAT_FILESTREAMS, ",filestreams" },
{ XFS_FEAT_GRPID, ",grpid" },
{ XFS_FEAT_DISCARD, ",discard" },
@@ -541,7 +535,8 @@ xfs_setup_devices(
{
int error;
- error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
+ error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize,
+ mp->m_sb.sb_dblocks);
if (error)
return error;
@@ -551,7 +546,7 @@ xfs_setup_devices(
if (xfs_has_sector(mp))
log_sector_size = mp->m_sb.sb_logsectsize;
error = xfs_configure_buftarg(mp->m_logdev_targp,
- log_sector_size);
+ log_sector_size, mp->m_sb.sb_logblocks);
if (error)
return error;
}
@@ -565,7 +560,7 @@ xfs_setup_devices(
mp->m_rtdev_targp = mp->m_ddev_targp;
} else if (mp->m_rtname) {
error = xfs_configure_buftarg(mp->m_rtdev_targp,
- mp->m_sb.sb_sectsize);
+ mp->m_sb.sb_sectsize, mp->m_sb.sb_rblocks);
if (error)
return error;
}
@@ -578,19 +573,19 @@ xfs_init_mount_workqueues(
struct xfs_mount *mp)
{
mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
- XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
1, mp->m_super->s_id);
if (!mp->m_buf_workqueue)
goto out;
mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
- XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
0, mp->m_super->s_id);
if (!mp->m_unwritten_workqueue)
goto out_destroy_buf;
mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
- XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
0, mp->m_super->s_id);
if (!mp->m_reclaim_workqueue)
goto out_destroy_unwritten;
@@ -602,13 +597,14 @@ xfs_init_mount_workqueues(
goto out_destroy_reclaim;
mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
- XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
1, mp->m_super->s_id);
if (!mp->m_inodegc_wq)
goto out_destroy_blockgc;
mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
- XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_PERCPU), 0,
+ mp->m_super->s_id);
if (!mp->m_sync_workqueue)
goto out_destroy_inodegc;
@@ -778,7 +774,7 @@ xfs_fs_drop_inode(
return 0;
}
- return generic_drop_inode(inode);
+ return inode_generic_drop(inode);
}
STATIC void
@@ -1088,15 +1084,6 @@ xfs_finish_flags(
}
/*
- * V5 filesystems always use attr2 format for attributes.
- */
- if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
- xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
- "attr2 is always enabled for V5 filesystems.");
- return -EINVAL;
- }
-
- /*
* prohibit r/w mounts of read-only filesystems
*/
if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
@@ -1542,22 +1529,6 @@ xfs_fs_parse_param(
return 0;
#endif
/* Following mount options will be removed in September 2025 */
- case Opt_ikeep:
- xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true);
- parsing_mp->m_features |= XFS_FEAT_IKEEP;
- return 0;
- case Opt_noikeep:
- xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false);
- parsing_mp->m_features &= ~XFS_FEAT_IKEEP;
- return 0;
- case Opt_attr2:
- xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true);
- parsing_mp->m_features |= XFS_FEAT_ATTR2;
- return 0;
- case Opt_noattr2:
- xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true);
- parsing_mp->m_features |= XFS_FEAT_NOATTR2;
- return 0;
case Opt_max_open_zones:
parsing_mp->m_max_open_zones = result.uint_32;
return 0;
@@ -1593,16 +1564,6 @@ xfs_fs_validate_params(
return -EINVAL;
}
- /*
- * We have not read the superblock at this point, so only the attr2
- * mount option can set the attr2 feature by this stage.
- */
- if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
- xfs_warn(mp, "attr2 and noattr2 cannot both be specified.");
- return -EINVAL;
- }
-
-
if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
xfs_warn(mp,
"sunit and swidth options incompatible with the noalign option");
@@ -2177,21 +2138,6 @@ xfs_fs_reconfigure(
if (error)
return error;
- /* attr2 -> noattr2 */
- if (xfs_has_noattr2(new_mp)) {
- if (xfs_has_crc(mp)) {
- xfs_warn(mp,
- "attr2 is always enabled for a V5 filesystem - can't be changed.");
- return -EINVAL;
- }
- mp->m_features &= ~XFS_FEAT_ATTR2;
- mp->m_features |= XFS_FEAT_NOATTR2;
- } else if (xfs_has_attr2(new_mp)) {
- /* noattr2 -> attr2 */
- mp->m_features &= ~XFS_FEAT_NOATTR2;
- mp->m_features |= XFS_FEAT_ATTR2;
- }
-
/* Validate new max_atomic_write option before making other changes */
if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) {
error = xfs_set_max_atomic_write_opt(mp,
@@ -2596,8 +2542,8 @@ xfs_init_workqueues(void)
* AGs in all the filesystems mounted. Hence use the default large
* max_active value for this workqueue.
*/
- xfs_alloc_wq = alloc_workqueue("xfsalloc",
- XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0);
+ xfs_alloc_wq = alloc_workqueue("xfsalloc", XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU),
+ 0);
if (!xfs_alloc_wq)
return -ENOMEM;
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index 751dc74a3067..9918f14b4874 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -50,7 +50,7 @@ xfs_panic_mask_proc_handler(
}
#endif /* CONFIG_PROC_FS */
-STATIC int
+static inline int
xfs_deprecated_dointvec_minmax(
const struct ctl_table *ctl,
int write,
@@ -68,24 +68,6 @@ xfs_deprecated_dointvec_minmax(
static const struct ctl_table xfs_table[] = {
{
- .procname = "irix_sgid_inherit",
- .data = &xfs_params.sgid_inherit.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = xfs_deprecated_dointvec_minmax,
- .extra1 = &xfs_params.sgid_inherit.min,
- .extra2 = &xfs_params.sgid_inherit.max
- },
- {
- .procname = "irix_symlink_mode",
- .data = &xfs_params.symlink_mode.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = xfs_deprecated_dointvec_minmax,
- .extra1 = &xfs_params.symlink_mode.min,
- .extra2 = &xfs_params.symlink_mode.max
- },
- {
.procname = "panic_mask",
.data = &xfs_params.panic_mask.val,
.maxlen = sizeof(int),
@@ -185,15 +167,6 @@ static const struct ctl_table xfs_table[] = {
.extra1 = &xfs_params.blockgc_timer.min,
.extra2 = &xfs_params.blockgc_timer.max,
},
- {
- .procname = "speculative_cow_prealloc_lifetime",
- .data = &xfs_params.blockgc_timer.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = xfs_deprecated_dointvec_minmax,
- .extra1 = &xfs_params.blockgc_timer.min,
- .extra2 = &xfs_params.blockgc_timer.max,
- },
/* please keep this the last entry */
#ifdef CONFIG_PROC_FS
{
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index 51646f066c4f..ed9d896079c1 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -19,9 +19,6 @@ typedef struct xfs_sysctl_val {
} xfs_sysctl_val_t;
typedef struct xfs_param {
- xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is
- * not a member of parent dir GID. */
- xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */
xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */
xfs_sysctl_val_t error_level; /* Degree of reporting for problems */
xfs_sysctl_val_t syncd_timer; /* Interval between xfssyncd wakeups */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ac344e42846c..79b8641880ab 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1152,7 +1152,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
- __entry->count = atomic_read(&VFS_I(ip)->i_count);
+ __entry->count = icount_read(VFS_I(ip));
__entry->pincount = atomic_read(&ip->i_pincount);
__entry->iflags = ip->i_flags;
__entry->caller_ip = caller_ip;
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 575e7028f423..474f5a04ec63 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -452,19 +452,17 @@ xfs_trans_mod_sb(
*/
STATIC void
xfs_trans_apply_sb_deltas(
- xfs_trans_t *tp)
+ struct xfs_trans *tp)
{
- struct xfs_dsb *sbp;
- struct xfs_buf *bp;
- int whole = 0;
-
- bp = xfs_trans_getsb(tp);
- sbp = bp->b_addr;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_buf *bp = xfs_trans_getsb(tp);
+ struct xfs_dsb *sbp = bp->b_addr;
+ int whole = 0;
/*
* Only update the superblock counters if we are logging them
*/
- if (!xfs_has_lazysbcount((tp->t_mountp))) {
+ if (!xfs_has_lazysbcount(mp)) {
if (tp->t_icount_delta)
be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta);
if (tp->t_ifree_delta)
@@ -491,8 +489,7 @@ xfs_trans_apply_sb_deltas(
* write the correct value ondisk.
*/
if ((tp->t_frextents_delta || tp->t_res_frextents_delta) &&
- !xfs_has_rtgroups(tp->t_mountp)) {
- struct xfs_mount *mp = tp->t_mountp;
+ !xfs_has_rtgroups(mp)) {
int64_t rtxdelta;
rtxdelta = tp->t_frextents_delta + tp->t_res_frextents_delta;
@@ -505,6 +502,8 @@ xfs_trans_apply_sb_deltas(
if (tp->t_dblocks_delta) {
be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta);
+ mp->m_ddev_targp->bt_nr_sectors +=
+ XFS_FSB_TO_BB(mp, tp->t_dblocks_delta);
whole = 1;
}
if (tp->t_agcount_delta) {
@@ -524,7 +523,7 @@ xfs_trans_apply_sb_deltas(
* recompute the ondisk rtgroup block log. The incore values
* will be recomputed in xfs_trans_unreserve_and_mod_sb.
*/
- if (xfs_has_rtgroups(tp->t_mountp)) {
+ if (xfs_has_rtgroups(mp)) {
sbp->sb_rgblklog = xfs_compute_rgblklog(
be32_to_cpu(sbp->sb_rgextents),
be32_to_cpu(sbp->sb_rextsize));
@@ -537,6 +536,8 @@ xfs_trans_apply_sb_deltas(
}
if (tp->t_rblocks_delta) {
be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta);
+ mp->m_rtdev_targp->bt_nr_sectors +=
+ XFS_FSB_TO_BB(mp, tp->t_rblocks_delta);
whole = 1;
}
if (tp->t_rextents_delta) {
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 67c328d23e4a..38983c6777df 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -374,7 +374,7 @@ xfsaild_push_item(
* If log item pinning is enabled, skip the push and track the item as
* pinned. This can help induce head-behind-tail conditions.
*/
- if (XFS_TEST_ERROR(false, ailp->ail_log->l_mp, XFS_ERRTAG_LOG_ITEM_PIN))
+ if (XFS_TEST_ERROR(ailp->ail_log->l_mp, XFS_ERRTAG_LOG_ITEM_PIN))
return XFS_ITEM_PINNED;
/*
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index f28214c28ab5..1147bacb2da8 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -493,64 +493,58 @@ xfs_try_open_zone(
return oz;
}
+enum xfs_zone_alloc_score {
+ /* Any open zone will do it, we're desperate */
+ XFS_ZONE_ALLOC_ANY = 0,
+
+ /* It better fit somehow */
+ XFS_ZONE_ALLOC_OK = 1,
+
+ /* Only reuse a zone if it fits really well. */
+ XFS_ZONE_ALLOC_GOOD = 2,
+};
+
/*
- * For data with short or medium lifetime, try to colocated it into an
- * already open zone with a matching temperature.
+ * Life time hint co-location matrix. Fields not set default to 0
+ * aka XFS_ZONE_ALLOC_ANY.
*/
-static bool
-xfs_colocate_eagerly(
- enum rw_hint file_hint)
-{
- switch (file_hint) {
- case WRITE_LIFE_MEDIUM:
- case WRITE_LIFE_SHORT:
- case WRITE_LIFE_NONE:
- return true;
- default:
- return false;
- }
-}
-
-static bool
-xfs_good_hint_match(
- struct xfs_open_zone *oz,
- enum rw_hint file_hint)
-{
- switch (oz->oz_write_hint) {
- case WRITE_LIFE_LONG:
- case WRITE_LIFE_EXTREME:
- /* colocate long and extreme */
- if (file_hint == WRITE_LIFE_LONG ||
- file_hint == WRITE_LIFE_EXTREME)
- return true;
- break;
- case WRITE_LIFE_MEDIUM:
- /* colocate medium with medium */
- if (file_hint == WRITE_LIFE_MEDIUM)
- return true;
- break;
- case WRITE_LIFE_SHORT:
- case WRITE_LIFE_NONE:
- case WRITE_LIFE_NOT_SET:
- /* colocate short and none */
- if (file_hint <= WRITE_LIFE_SHORT)
- return true;
- break;
- }
- return false;
-}
+static const unsigned int
+xfs_zoned_hint_score[WRITE_LIFE_HINT_NR][WRITE_LIFE_HINT_NR] = {
+ [WRITE_LIFE_NOT_SET] = {
+ [WRITE_LIFE_NOT_SET] = XFS_ZONE_ALLOC_OK,
+ },
+ [WRITE_LIFE_NONE] = {
+ [WRITE_LIFE_NONE] = XFS_ZONE_ALLOC_OK,
+ },
+ [WRITE_LIFE_SHORT] = {
+ [WRITE_LIFE_SHORT] = XFS_ZONE_ALLOC_GOOD,
+ },
+ [WRITE_LIFE_MEDIUM] = {
+ [WRITE_LIFE_MEDIUM] = XFS_ZONE_ALLOC_GOOD,
+ },
+ [WRITE_LIFE_LONG] = {
+ [WRITE_LIFE_LONG] = XFS_ZONE_ALLOC_OK,
+ [WRITE_LIFE_EXTREME] = XFS_ZONE_ALLOC_OK,
+ },
+ [WRITE_LIFE_EXTREME] = {
+ [WRITE_LIFE_LONG] = XFS_ZONE_ALLOC_OK,
+ [WRITE_LIFE_EXTREME] = XFS_ZONE_ALLOC_OK,
+ },
+};
static bool
xfs_try_use_zone(
struct xfs_zone_info *zi,
enum rw_hint file_hint,
struct xfs_open_zone *oz,
- bool lowspace)
+ unsigned int goodness)
{
if (oz->oz_allocated == rtg_blocks(oz->oz_rtg))
return false;
- if (!lowspace && !xfs_good_hint_match(oz, file_hint))
+
+ if (xfs_zoned_hint_score[oz->oz_write_hint][file_hint] < goodness)
return false;
+
if (!atomic_inc_not_zero(&oz->oz_ref))
return false;
@@ -581,14 +575,14 @@ static struct xfs_open_zone *
xfs_select_open_zone_lru(
struct xfs_zone_info *zi,
enum rw_hint file_hint,
- bool lowspace)
+ unsigned int goodness)
{
struct xfs_open_zone *oz;
lockdep_assert_held(&zi->zi_open_zones_lock);
list_for_each_entry(oz, &zi->zi_open_zones, oz_entry)
- if (xfs_try_use_zone(zi, file_hint, oz, lowspace))
+ if (xfs_try_use_zone(zi, file_hint, oz, goodness))
return oz;
cond_resched_lock(&zi->zi_open_zones_lock);
@@ -651,9 +645,11 @@ xfs_select_zone_nowait(
* data.
*/
spin_lock(&zi->zi_open_zones_lock);
- if (xfs_colocate_eagerly(write_hint))
- oz = xfs_select_open_zone_lru(zi, write_hint, false);
- else if (pack_tight)
+ oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_GOOD);
+ if (oz)
+ goto out_unlock;
+
+ if (pack_tight)
oz = xfs_select_open_zone_mru(zi, write_hint);
if (oz)
goto out_unlock;
@@ -667,16 +663,16 @@ xfs_select_zone_nowait(
goto out_unlock;
/*
- * Try to colocate cold data with other cold data if we failed to open a
- * new zone for it.
+ * Try to find an zone that is an ok match to colocate data with.
+ */
+ oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_OK);
+ if (oz)
+ goto out_unlock;
+
+ /*
+ * Pick the least recently used zone, regardless of hint match
*/
- if (write_hint != WRITE_LIFE_NOT_SET &&
- !xfs_colocate_eagerly(write_hint))
- oz = xfs_select_open_zone_lru(zi, write_hint, false);
- if (!oz)
- oz = xfs_select_open_zone_lru(zi, WRITE_LIFE_NOT_SET, false);
- if (!oz)
- oz = xfs_select_open_zone_lru(zi, WRITE_LIFE_NOT_SET, true);
+ oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_ANY);
out_unlock:
spin_unlock(&zi->zi_open_zones_lock);
return oz;
@@ -1135,7 +1131,7 @@ xfs_calc_open_zones(
if (bdev_open_zones)
mp->m_max_open_zones = bdev_open_zones;
else
- mp->m_max_open_zones = xfs_max_open_zones(mp);
+ mp->m_max_open_zones = XFS_DEFAULT_MAX_OPEN_ZONES;
}
if (mp->m_max_open_zones < XFS_MIN_OPEN_ZONES) {
@@ -1248,7 +1244,7 @@ xfs_mount_zones(
if (!mp->m_zone_info)
return -ENOMEM;
- xfs_info(mp, "%u zones of %u blocks size (%u max open)",
+ xfs_info(mp, "%u zones of %u blocks (%u max open zones)",
mp->m_sb.sb_rgcount, mp->m_groups[XG_TYPE_RTG].blocks,
mp->m_max_open_zones);
trace_xfs_zones_mount(mp);