diff options
| author | Carlos Maiolino <cem@kernel.org> | 2026-01-28 10:15:07 +0100 |
|---|---|---|
| committer | Carlos Maiolino <cem@kernel.org> | 2026-01-28 10:15:07 +0100 |
| commit | 2744d7adb2622864f2e4fb15cbc85aed3fbe1e8f (patch) | |
| tree | d654aa6e829f41187e8df6f9690b2a8a20127716 | |
| parent | 04a65666a69508fa0022c7343026c5a3d41d166d (diff) | |
| parent | bd3138e8912c9db182eac5fed1337645a98b7a4f (diff) | |
Merge tag 'attr-leaf-freemap-fixes-7.0_2026-01-25' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-7.0-merge
xfs: fix problems in the attr leaf freemap code [1/3]
Running generic/753 for hours revealed data corruption problems in the
attr leaf block space management code. Under certain circumstances,
freemap entries are left with zero size but a nonzero offset. If that
offset happens to be the same offset as the end of the entries array
during an attr set operation, the leaf entry table expansion will push
the freemap record offset upwards without checking for overlap with any
other freemap entries. If there happened to be a second freemap entry
overlapping with the newly allocated leaf entry space, then the next
attr set operation might find that space and overwrite the leaf entry,
thereby corrupting the leaf block.
Fix this by zeroing the freemap offset any time we set the size to zero.
If a subsequent attr set operation finds no space in the freemap, it
will compact the block and regenerate the freemaps.
With a bit of luck, this should all go splendidly.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
| -rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.c | 157 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_da_format.h | 2 | ||||
| -rw-r--r-- | fs/xfs/scrub/attr.c | 59 |
3 files changed, 155 insertions, 63 deletions
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 6061230b17ef..c3327b10709c 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -75,6 +75,59 @@ STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args, int move_count); STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); +/* Compute the byte offset of the end of the leaf entry array. */ +static inline int +xfs_attr_leaf_entries_end( + unsigned int hdrcount, + const struct xfs_attr_leafblock *leaf) +{ + return hdrcount * sizeof(struct xfs_attr_leaf_entry) + + xfs_attr3_leaf_hdr_size(leaf); +} + +static inline bool +ichdr_freemaps_overlap( + const struct xfs_attr3_icleaf_hdr *ichdr, + unsigned int x, + unsigned int y) +{ + const unsigned int xend = + ichdr->freemap[x].base + ichdr->freemap[x].size; + const unsigned int yend = + ichdr->freemap[y].base + ichdr->freemap[y].size; + + /* empty slots do not overlap */ + if (!ichdr->freemap[x].size || !ichdr->freemap[y].size) + return false; + + return ichdr->freemap[x].base < yend && xend > ichdr->freemap[y].base; +} + +static inline xfs_failaddr_t +xfs_attr_leaf_ichdr_freemaps_verify( + const struct xfs_attr3_icleaf_hdr *ichdr, + const struct xfs_attr_leafblock *leaf) +{ + unsigned int entries_end = + xfs_attr_leaf_entries_end(ichdr->count, leaf); + int i; + + if (ichdr_freemaps_overlap(ichdr, 0, 1)) + return __this_address; + if (ichdr_freemaps_overlap(ichdr, 0, 2)) + return __this_address; + if (ichdr_freemaps_overlap(ichdr, 1, 2)) + return __this_address; + + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + if (ichdr->freemap[i].size > 0 && + ichdr->freemap[i].base < entries_end) + return __this_address; + } + + return NULL; +} + /* * attr3 block 'firstused' conversion helpers. * @@ -218,6 +271,8 @@ xfs_attr3_leaf_hdr_to_disk( hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base); hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size); } + + ASSERT(xfs_attr_leaf_ichdr_freemaps_verify(from, to) == NULL); return; } to->hdr.info.forw = cpu_to_be32(from->forw); @@ -233,6 +288,8 @@ xfs_attr3_leaf_hdr_to_disk( to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base); to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size); } + + ASSERT(xfs_attr_leaf_ichdr_freemaps_verify(from, to) == NULL); } static xfs_failaddr_t @@ -385,6 +442,10 @@ xfs_attr3_leaf_verify( return __this_address; } + fa = xfs_attr_leaf_ichdr_freemaps_verify(&ichdr, leaf); + if (fa) + return fa; + return NULL; } @@ -1409,8 +1470,7 @@ xfs_attr3_leaf_add( * Search through freemap for first-fit on new name length. * (may need to figure in size of entry struct too) */ - tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf); + tablesize = xfs_attr_leaf_entries_end(ichdr.count + 1, leaf); for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) { if (tablesize > ichdr.firstused) { sum += ichdr.freemap[i].size; @@ -1476,6 +1536,7 @@ xfs_attr3_leaf_add_work( struct xfs_attr_leaf_name_local *name_loc; struct xfs_attr_leaf_name_remote *name_rmt; struct xfs_mount *mp; + int old_end, new_end; int tmp; int i; @@ -1568,17 +1629,48 @@ xfs_attr3_leaf_add_work( if (be16_to_cpu(entry->nameidx) < ichdr->firstused) ichdr->firstused = be16_to_cpu(entry->nameidx); - ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf)); - tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf); + new_end = xfs_attr_leaf_entries_end(ichdr->count, leaf); + old_end = new_end - sizeof(struct xfs_attr_leaf_entry); + + ASSERT(ichdr->firstused >= new_end); for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - if (ichdr->freemap[i].base == tmp) { - ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t); + int diff = 0; + + if (ichdr->freemap[i].base == old_end) { + /* + * This freemap entry starts at the old end of the + * leaf entry array, so we need to adjust its base + * upward to accomodate the larger array. + */ + diff = sizeof(struct xfs_attr_leaf_entry); + } else if (ichdr->freemap[i].size > 0 && + ichdr->freemap[i].base < new_end) { + /* + * This freemap entry starts in the space claimed by + * the new leaf entry. Adjust its base upward to + * reflect that. + */ + diff = new_end - ichdr->freemap[i].base; + } + + if (diff) { + ichdr->freemap[i].base += diff; ichdr->freemap[i].size -= - min_t(uint16_t, ichdr->freemap[i].size, - sizeof(xfs_attr_leaf_entry_t)); + min_t(uint16_t, ichdr->freemap[i].size, diff); + } + + /* + * Don't leave zero-length freemaps with nonzero base lying + * around, because we don't want the code in _remove that + * matches on base address to get confused and create + * overlapping freemaps. If we end up with no freemap entries + * then the next _add will compact the leaf block and + * regenerate the freemaps. + */ + if (ichdr->freemap[i].size == 0 && ichdr->freemap[i].base > 0) { + ichdr->freemap[i].base = 0; + ichdr->holes = 1; } } ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index); @@ -1623,6 +1715,10 @@ xfs_attr3_leaf_compact( ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src); ichdr_dst->freemap[0].size = ichdr_dst->firstused - ichdr_dst->freemap[0].base; + ichdr_dst->freemap[1].base = 0; + ichdr_dst->freemap[2].base = 0; + ichdr_dst->freemap[1].size = 0; + ichdr_dst->freemap[2].size = 0; /* write the header back to initialise the underlying buffer */ xfs_attr3_leaf_hdr_to_disk(args->geo, leaf_dst, ichdr_dst); @@ -1774,8 +1870,8 @@ xfs_attr3_leaf_rebalance( /* * leaf2 is the destination, compact it if it looks tight. */ - max = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1); - max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t); + max = ichdr2.firstused - + xfs_attr_leaf_entries_end(ichdr2.count, leaf1); if (space > max) xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp); @@ -1803,8 +1899,8 @@ xfs_attr3_leaf_rebalance( /* * leaf1 is the destination, compact it if it looks tight. */ - max = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1); - max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t); + max = ichdr1.firstused - + xfs_attr_leaf_entries_end(ichdr1.count, leaf1); if (space > max) xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp); @@ -2010,9 +2106,7 @@ xfs_attr3_leaf_toosmall( blk = &state->path.blk[ state->path.active-1 ]; leaf = blk->bp->b_addr; xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr, leaf); - bytes = xfs_attr3_leaf_hdr_size(leaf) + - ichdr.count * sizeof(xfs_attr_leaf_entry_t) + - ichdr.usedbytes; + bytes = xfs_attr_leaf_entries_end(ichdr.count, leaf) + ichdr.usedbytes; if (bytes > (state->args->geo->blksize >> 1)) { *action = 0; /* blk over 50%, don't try to join */ return 0; @@ -2070,9 +2164,8 @@ xfs_attr3_leaf_toosmall( bytes = state->args->geo->blksize - (state->args->geo->blksize >> 2) - ichdr.usedbytes - ichdr2.usedbytes - - ((ichdr.count + ichdr2.count) * - sizeof(xfs_attr_leaf_entry_t)) - - xfs_attr3_leaf_hdr_size(leaf); + xfs_attr_leaf_entries_end(ichdr.count + ichdr2.count, + leaf); xfs_trans_brelse(state->args->trans, bp); if (bytes >= 0) @@ -2134,8 +2227,7 @@ xfs_attr3_leaf_remove( ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); ASSERT(args->index >= 0 && args->index < ichdr.count); - ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) + - xfs_attr3_leaf_hdr_size(leaf)); + ASSERT(ichdr.firstused >= xfs_attr_leaf_entries_end(ichdr.count, leaf)); entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; @@ -2148,8 +2240,7 @@ xfs_attr3_leaf_remove( * find smallest free region in case we need to replace it, * adjust any map that borders the entry table, */ - tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf); + tablesize = xfs_attr_leaf_entries_end(ichdr.count, leaf); tmp = ichdr.freemap[0].size; before = after = -1; smallest = XFS_ATTR_LEAF_MAPSIZE - 1; @@ -2256,8 +2347,7 @@ xfs_attr3_leaf_remove( * Check if leaf is less than 50% full, caller may want to * "join" the leaf with a sibling if so. */ - tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) + - ichdr.count * sizeof(xfs_attr_leaf_entry_t); + tmp = ichdr.usedbytes + xfs_attr_leaf_entries_end(ichdr.count, leaf); return tmp < args->geo->magicpct; /* leaf is < 37% full */ } @@ -2580,11 +2670,11 @@ xfs_attr3_leaf_moveents( ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC); ASSERT(ichdr_s->magic == ichdr_d->magic); ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8); - ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s)) - + xfs_attr3_leaf_hdr_size(leaf_s)); + ASSERT(ichdr_s->firstused >= + xfs_attr_leaf_entries_end(ichdr_s->count, leaf_s)); ASSERT(ichdr_d->count < args->geo->blksize / 8); - ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d)) - + xfs_attr3_leaf_hdr_size(leaf_d)); + ASSERT(ichdr_d->firstused >= + xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d)); ASSERT(start_s < ichdr_s->count); ASSERT(start_d <= ichdr_d->count); @@ -2644,8 +2734,7 @@ xfs_attr3_leaf_moveents( ichdr_d->usedbytes += tmp; ichdr_s->count -= 1; ichdr_d->count += 1; - tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf_d); + tmp = xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d); ASSERT(ichdr_d->firstused >= tmp); #ifdef GROT } @@ -2681,8 +2770,8 @@ xfs_attr3_leaf_moveents( /* * Fill in the freemap information */ - ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d); - ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t); + ichdr_d->freemap[0].base = + xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d); ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base; ichdr_d->freemap[1].base = 0; ichdr_d->freemap[2].base = 0; diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 86de99e2f757..7d55307e619f 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -746,7 +746,7 @@ struct xfs_attr3_leafblock { #define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t)) static inline int -xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp) +xfs_attr3_leaf_hdr_size(const struct xfs_attr_leafblock *leafp) { if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) return sizeof(struct xfs_attr3_leaf_hdr); diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index eeb5ac34d742..c3c122ea2d32 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -288,32 +288,6 @@ xchk_xattr_set_map( } /* - * Check the leaf freemap from the usage bitmap. Returns false if the - * attr freemap has problems or points to used space. - */ -STATIC bool -xchk_xattr_check_freemap( - struct xfs_scrub *sc, - struct xfs_attr3_icleaf_hdr *leafhdr) -{ - struct xchk_xattr_buf *ab = sc->buf; - unsigned int mapsize = sc->mp->m_attr_geo->blksize; - int i; - - /* Construct bitmap of freemap contents. */ - bitmap_zero(ab->freemap, mapsize); - for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - if (!xchk_xattr_set_map(sc, ab->freemap, - leafhdr->freemap[i].base, - leafhdr->freemap[i].size)) - return false; - } - - /* Look for bits that are set in freemap and are marked in use. */ - return !bitmap_intersects(ab->freemap, ab->usedmap, mapsize); -} - -/* * Check this leaf entry's relations to everything else. * Returns the number of bytes used for the name/value data. */ @@ -364,7 +338,10 @@ xchk_xattr_entry( rentry = xfs_attr3_leaf_name_remote(leaf, idx); namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); name_end = (char *)rentry + namesize; - if (rentry->namelen == 0 || rentry->valueblk == 0) + if (rentry->namelen == 0) + xchk_da_set_corrupt(ds, level); + if (rentry->valueblk == 0 && + !(ent->flags & XFS_ATTR_INCOMPLETE)) xchk_da_set_corrupt(ds, level); } if (name_end > buf_end) @@ -403,6 +380,7 @@ xchk_xattr_block( *last_checked = blk->blkno; bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize); + bitmap_zero(ab->freemap, mp->m_attr_geo->blksize); /* Check all the padding. */ if (xfs_has_crc(ds->sc->mp)) { @@ -449,6 +427,9 @@ xchk_xattr_block( if ((char *)&entries[leafhdr.count] > (char *)leaf + leafhdr.firstused) xchk_da_set_corrupt(ds, level); + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) { /* Mark the leaf entry itself. */ @@ -467,7 +448,29 @@ xchk_xattr_block( goto out; } - if (!xchk_xattr_check_freemap(ds->sc, &leafhdr)) + /* Construct bitmap of freemap contents. */ + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + if (!xchk_xattr_set_map(ds->sc, ab->freemap, + leafhdr.freemap[i].base, + leafhdr.freemap[i].size)) + xchk_da_set_corrupt(ds, level); + + /* + * freemap entries with zero length and nonzero base can cause + * problems with older kernels, so we mark these for preening + * even though there's no inconsistency. + */ + if (leafhdr.freemap[i].size == 0 && + leafhdr.freemap[i].base > 0) + xchk_da_set_preen(ds, level); + + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + } + + /* Look for bits that are set in freemap and are marked in use. */ + if (bitmap_intersects(ab->freemap, ab->usedmap, + mp->m_attr_geo->blksize)) xchk_da_set_corrupt(ds, level); if (leafhdr.usedbytes != usedbytes) |
