summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/core-api/symbol-namespaces.rst11
-rw-r--r--drivers/tty/serial/8250/8250_rsa.c8
-rw-r--r--fs/anon_inodes.c2
-rw-r--r--fs/btrfs/extent_io.c24
-rw-r--r--fs/btrfs/inode.c29
-rw-r--r--fs/btrfs/subpage.c19
-rw-r--r--fs/btrfs/super.c13
-rw-r--r--fs/btrfs/zoned.c133
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/fhandle.c2
-rw-r--r--fs/fs-writeback.c9
-rw-r--r--fs/fuse/inode.c5
-rw-r--r--fs/iomap/direct-io.c14
-rw-r--r--fs/kernfs/inode.c4
-rw-r--r--fs/namespace.c76
-rw-r--r--fs/netfs/read_collect.c4
-rw-r--r--fs/netfs/write_collect.c10
-rw-r--r--fs/netfs/write_issue.c4
-rw-r--r--fs/overlayfs/dir.c2
-rw-r--r--fs/overlayfs/util.c3
-rw-r--r--fs/pidfs.c2
-rw-r--r--fs/pnode.c10
-rw-r--r--fs/splice.c3
-rw-r--r--include/linux/export.h2
-rw-r--r--include/linux/netfs.h1
-rw-r--r--kernel/signal.c6
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c3
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c77
29 files changed, 332 insertions, 148 deletions
diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst
index 32fc73dc5529..034898e81ba2 100644
--- a/Documentation/core-api/symbol-namespaces.rst
+++ b/Documentation/core-api/symbol-namespaces.rst
@@ -76,20 +76,21 @@ unit as preprocessor statement. The above example would then read::
within the corresponding compilation unit before the #include for
<linux/export.h>. Typically it's placed before the first #include statement.
-Using the EXPORT_SYMBOL_GPL_FOR_MODULES() macro
------------------------------------------------
+Using the EXPORT_SYMBOL_FOR_MODULES() macro
+-------------------------------------------
Symbols exported using this macro are put into a module namespace. This
-namespace cannot be imported.
+namespace cannot be imported. These exports are GPL-only as they are only
+intended for in-tree modules.
The macro takes a comma separated list of module names, allowing only those
modules to access this symbol. Simple tail-globs are supported.
For example::
- EXPORT_SYMBOL_GPL_FOR_MODULES(preempt_notifier_inc, "kvm,kvm-*")
+ EXPORT_SYMBOL_FOR_MODULES(preempt_notifier_inc, "kvm,kvm-*")
-will limit usage of this symbol to modules whoes name matches the given
+will limit usage of this symbol to modules whose name matches the given
patterns.
How to use Symbols exported in Namespaces
diff --git a/drivers/tty/serial/8250/8250_rsa.c b/drivers/tty/serial/8250/8250_rsa.c
index d34093cc03ad..12a65b79583c 100644
--- a/drivers/tty/serial/8250/8250_rsa.c
+++ b/drivers/tty/serial/8250/8250_rsa.c
@@ -147,7 +147,7 @@ void rsa_enable(struct uart_8250_port *up)
if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16)
serial_out(up, UART_RSA_FRR, 0);
}
-EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_enable, "8250_base");
+EXPORT_SYMBOL_FOR_MODULES(rsa_enable, "8250_base");
/*
* Attempts to turn off the RSA FIFO and resets the RSA board back to 115kbps compat mode. It is
@@ -179,7 +179,7 @@ void rsa_disable(struct uart_8250_port *up)
up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16;
uart_port_unlock_irq(&up->port);
}
-EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_disable, "8250_base");
+EXPORT_SYMBOL_FOR_MODULES(rsa_disable, "8250_base");
void rsa_autoconfig(struct uart_8250_port *up)
{
@@ -192,7 +192,7 @@ void rsa_autoconfig(struct uart_8250_port *up)
if (__rsa_enable(up))
up->port.type = PORT_RSA;
}
-EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_autoconfig, "8250_base");
+EXPORT_SYMBOL_FOR_MODULES(rsa_autoconfig, "8250_base");
void rsa_reset(struct uart_8250_port *up)
{
@@ -201,7 +201,7 @@ void rsa_reset(struct uart_8250_port *up)
serial_out(up, UART_RSA_FRR, 0);
}
-EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_reset, "8250_base");
+EXPORT_SYMBOL_FOR_MODULES(rsa_reset, "8250_base");
#ifdef CONFIG_SERIAL_8250_DEPRECATED_OPTIONS
#ifndef MODULE
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 1d847a939f29..180a458fc4f7 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -129,7 +129,7 @@ struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *n
}
return inode;
}
-EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm");
+EXPORT_SYMBOL_FOR_MODULES(anon_inode_make_secure_inode, "kvm");
static struct file *__anon_inode_getfile(const char *name,
const struct file_operations *fops,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f23d75986947..c953297aa89a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1512,7 +1512,7 @@ out:
/*
* Return 0 if we have submitted or queued the sector for submission.
- * Return <0 for critical errors.
+ * Return <0 for critical errors, and the sector will have its dirty flag cleared.
*
* Caller should make sure filepos < i_size and handle filepos >= i_size case.
*/
@@ -1535,8 +1535,17 @@ static int submit_one_sector(struct btrfs_inode *inode,
ASSERT(filepos < i_size);
em = btrfs_get_extent(inode, NULL, filepos, sectorsize);
- if (IS_ERR(em))
+ if (IS_ERR(em)) {
+ /*
+ * When submission failed, we should still clear the folio dirty.
+ * Or the folio will be written back again but without any
+ * ordered extent.
+ */
+ btrfs_folio_clear_dirty(fs_info, folio, filepos, sectorsize);
+ btrfs_folio_set_writeback(fs_info, folio, filepos, sectorsize);
+ btrfs_folio_clear_writeback(fs_info, folio, filepos, sectorsize);
return PTR_ERR(em);
+ }
extent_offset = filepos - em->start;
em_end = btrfs_extent_map_end(em);
@@ -1609,8 +1618,12 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
folio_unlock(folio);
return 1;
}
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_folio_clear_dirty(fs_info, folio, start, len);
+ btrfs_folio_set_writeback(fs_info, folio, start, len);
+ btrfs_folio_clear_writeback(fs_info, folio, start, len);
return ret;
+ }
for (cur = start; cur < start + len; cur += fs_info->sectorsize)
set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap);
@@ -1666,8 +1679,8 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
* Here we set writeback and clear for the range. If the full folio
* is no longer dirty then we clear the PAGECACHE_TAG_DIRTY tag.
*
- * If we hit any error, the corresponding sector will still be dirty
- * thus no need to clear PAGECACHE_TAG_DIRTY.
+ * If we hit any error, the corresponding sector will have its dirty
+ * flag cleared and writeback finished, thus no need to handle the error case.
*/
if (!submitted_io && !error) {
btrfs_folio_set_writeback(fs_info, folio, start, len);
@@ -1813,6 +1826,7 @@ static noinline_for_stack bool lock_extent_buffer_for_io(struct extent_buffer *e
xas_load(&xas);
xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
+ xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
xas_unlock_irqrestore(&xas, flags);
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d740910e071a..9e4aec7330cb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4189,6 +4189,23 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
return ret;
}
+static void update_time_after_link_or_unlink(struct btrfs_inode *dir)
+{
+ struct timespec64 now;
+
+ /*
+ * If we are replaying a log tree, we do not want to update the mtime
+ * and ctime of the parent directory with the current time, since the
+ * log replay procedure is responsible for setting them to their correct
+ * values (the ones it had when the fsync was done).
+ */
+ if (test_bit(BTRFS_FS_LOG_RECOVERING, &dir->root->fs_info->flags))
+ return;
+
+ now = inode_set_ctime_current(&dir->vfs_inode);
+ inode_set_mtime_to_ts(&dir->vfs_inode, now);
+}
+
/*
* unlink helper that gets used here in inode.c and in the tree logging
* recovery code. It remove a link in a directory with a given name, and
@@ -4289,7 +4306,7 @@ skip_backref:
inode_inc_iversion(&inode->vfs_inode);
inode_set_ctime_current(&inode->vfs_inode);
inode_inc_iversion(&dir->vfs_inode);
- inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode));
+ update_time_after_link_or_unlink(dir);
return btrfs_update_inode(trans, dir);
}
@@ -6683,15 +6700,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
name->len * 2);
inode_inc_iversion(&parent_inode->vfs_inode);
- /*
- * If we are replaying a log tree, we do not want to update the mtime
- * and ctime of the parent directory with the current time, since the
- * log replay procedure is responsible for setting them to their correct
- * values (the ones it had when the fsync was done).
- */
- if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags))
- inode_set_mtime_to_ts(&parent_inode->vfs_inode,
- inode_set_ctime_current(&parent_inode->vfs_inode));
+ update_time_after_link_or_unlink(parent_inode);
ret = btrfs_update_inode(trans, parent_inode);
if (ret)
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index c9b3821957f7..cb4f97833dc3 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -448,8 +448,25 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
spin_lock_irqsave(&bfs->lock, flags);
bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+
+ /*
+ * Don't clear the TOWRITE tag when starting writeback on a still-dirty
+ * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it,
+ * assume writeback is complete, and exit too early — violating sync
+ * ordering guarantees.
+ */
if (!folio_test_writeback(folio))
- folio_start_writeback(folio);
+ __folio_start_writeback(folio, true);
+ if (!folio_test_dirty(folio)) {
+ struct address_space *mapping = folio_mapping(folio);
+ XA_STATE(xas, &mapping->i_pages, folio->index);
+ unsigned long flags;
+
+ xas_lock_irqsave(&xas, flags);
+ xas_load(&xas);
+ xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
+ xas_unlock_irqrestore(&xas, flags);
+ }
spin_unlock_irqrestore(&bfs->lock, flags);
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 68e35a3700ff..a262b494a89f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -88,6 +88,9 @@ struct btrfs_fs_context {
refcount_t refs;
};
+static void btrfs_emit_options(struct btrfs_fs_info *info,
+ struct btrfs_fs_context *old);
+
enum {
Opt_acl,
Opt_clear_cache,
@@ -698,12 +701,9 @@ bool btrfs_check_options(const struct btrfs_fs_info *info,
if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) {
if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) {
- btrfs_info(info, "disk space caching is enabled");
btrfs_warn(info,
"space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2");
}
- if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE))
- btrfs_info(info, "using free-space-tree");
}
return ret;
@@ -980,6 +980,8 @@ static int btrfs_fill_super(struct super_block *sb,
return ret;
}
+ btrfs_emit_options(fs_info, NULL);
+
inode = btrfs_iget(BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
@@ -1437,7 +1439,7 @@ static void btrfs_emit_options(struct btrfs_fs_info *info,
{
btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum");
btrfs_info_if_set(info, old, DEGRADED, "allowing degraded mounts");
- btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum");
+ btrfs_info_if_set(info, old, NODATACOW, "setting nodatacow");
btrfs_info_if_set(info, old, SSD, "enabling ssd optimizations");
btrfs_info_if_set(info, old, SSD_SPREAD, "using spread ssd allocation scheme");
btrfs_info_if_set(info, old, NOBARRIER, "turning off barriers");
@@ -1459,10 +1461,11 @@ static void btrfs_emit_options(struct btrfs_fs_info *info,
btrfs_info_if_set(info, old, IGNOREMETACSUMS, "ignoring meta csums");
btrfs_info_if_set(info, old, IGNORESUPERFLAGS, "ignoring unknown super block flags");
+ btrfs_info_if_unset(info, old, NODATASUM, "setting datasum");
btrfs_info_if_unset(info, old, NODATACOW, "setting datacow");
btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations");
btrfs_info_if_unset(info, old, SSD_SPREAD, "not using spread ssd allocation scheme");
- btrfs_info_if_unset(info, old, NOBARRIER, "turning off barriers");
+ btrfs_info_if_unset(info, old, NOBARRIER, "turning on barriers");
btrfs_info_if_unset(info, old, NOTREELOG, "enabling tree log");
btrfs_info_if_unset(info, old, SPACE_CACHE, "disabling disk space caching");
btrfs_info_if_unset(info, old, FREE_SPACE_TREE, "disabling free space tree");
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index db11b5b5f0e6..ea662036f441 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -17,6 +17,7 @@
#include "accessors.h"
#include "bio.h"
#include "transaction.h"
+#include "sysfs.h"
/* Maximum number of zones to report per blkdev_report_zones() call */
#define BTRFS_REPORT_NR_ZONES 4096
@@ -42,6 +43,9 @@
/* Number of superblock log zones */
#define BTRFS_NR_SB_LOG_ZONES 2
+/* Default number of max active zones when the device has no limits. */
+#define BTRFS_DEFAULT_MAX_ACTIVE_ZONES 128
+
/*
* Minimum of active zones we need:
*
@@ -416,7 +420,10 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
- max_active_zones = bdev_max_active_zones(bdev);
+ max_active_zones = min_not_zero(bdev_max_active_zones(bdev),
+ bdev_max_open_zones(bdev));
+ if (!max_active_zones && zone_info->nr_zones > BTRFS_DEFAULT_MAX_ACTIVE_ZONES)
+ max_active_zones = BTRFS_DEFAULT_MAX_ACTIVE_ZONES;
if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) {
btrfs_err(fs_info,
"zoned: %s: max active zones %u is too small, need at least %u active zones",
@@ -2168,10 +2175,15 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
goto out_unlock;
}
- /* No space left */
- if (btrfs_zoned_bg_is_full(block_group)) {
- ret = false;
- goto out_unlock;
+ if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) {
+ /* The caller should check if the block group is full. */
+ if (WARN_ON_ONCE(btrfs_zoned_bg_is_full(block_group))) {
+ ret = false;
+ goto out_unlock;
+ }
+ } else {
+ /* Since it is already written, it should have been active. */
+ WARN_ON_ONCE(block_group->meta_write_pointer != block_group->start);
}
for (i = 0; i < map->num_stripes; i++) {
@@ -2230,7 +2242,7 @@ static void wait_eb_writebacks(struct btrfs_block_group *block_group)
struct btrfs_fs_info *fs_info = block_group->fs_info;
const u64 end = block_group->start + block_group->length;
struct extent_buffer *eb;
- unsigned long index, start = (block_group->start >> fs_info->sectorsize_bits);
+ unsigned long index, start = (block_group->start >> fs_info->nodesize_bits);
rcu_read_lock();
xa_for_each_start(&fs_info->buffer_tree, index, eb, start) {
@@ -2245,6 +2257,40 @@ static void wait_eb_writebacks(struct btrfs_block_group *block_group)
rcu_read_unlock();
}
+static int call_zone_finish(struct btrfs_block_group *block_group,
+ struct btrfs_io_stripe *stripe)
+{
+ struct btrfs_device *device = stripe->dev;
+ const u64 physical = stripe->physical;
+ struct btrfs_zoned_device_info *zinfo = device->zone_info;
+ int ret;
+
+ if (!device->bdev)
+ return 0;
+
+ if (zinfo->max_active_zones == 0)
+ return 0;
+
+ if (btrfs_dev_is_sequential(device, physical)) {
+ unsigned int nofs_flags;
+
+ nofs_flags = memalloc_nofs_save();
+ ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
+ physical >> SECTOR_SHIFT,
+ zinfo->zone_size >> SECTOR_SHIFT);
+ memalloc_nofs_restore(nofs_flags);
+
+ if (ret)
+ return ret;
+ }
+
+ if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
+ zinfo->reserved_active_zones++;
+ btrfs_dev_clear_active_zone(device, physical);
+
+ return 0;
+}
+
static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
@@ -2329,31 +2375,12 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
down_read(&dev_replace->rwsem);
map = block_group->physical_map;
for (i = 0; i < map->num_stripes; i++) {
- struct btrfs_device *device = map->stripes[i].dev;
- const u64 physical = map->stripes[i].physical;
- struct btrfs_zoned_device_info *zinfo = device->zone_info;
- unsigned int nofs_flags;
-
- if (!device->bdev)
- continue;
-
- if (zinfo->max_active_zones == 0)
- continue;
-
- nofs_flags = memalloc_nofs_save();
- ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
- physical >> SECTOR_SHIFT,
- zinfo->zone_size >> SECTOR_SHIFT);
- memalloc_nofs_restore(nofs_flags);
+ ret = call_zone_finish(block_group, &map->stripes[i]);
if (ret) {
up_read(&dev_replace->rwsem);
return ret;
}
-
- if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
- zinfo->reserved_active_zones++;
- btrfs_dev_clear_active_zone(device, physical);
}
up_read(&dev_replace->rwsem);
@@ -2504,12 +2531,12 @@ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
{
struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
- struct btrfs_space_info *space_info = data_sinfo->sub_group[0];
+ struct btrfs_space_info *space_info = data_sinfo;
struct btrfs_trans_handle *trans;
struct btrfs_block_group *bg;
struct list_head *bg_list;
u64 alloc_flags;
- bool initial = false;
+ bool first = true;
bool did_chunk_alloc = false;
int index;
int ret;
@@ -2523,21 +2550,52 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
if (sb_rdonly(fs_info->sb))
return;
- ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags);
index = btrfs_bg_flags_to_raid_index(alloc_flags);
- bg_list = &data_sinfo->block_groups[index];
+ /* Scan the data space_info to find empty block groups. Take the second one. */
again:
+ bg_list = &space_info->block_groups[index];
list_for_each_entry(bg, bg_list, list) {
- if (bg->used > 0)
+ if (bg->alloc_offset != 0)
continue;
- if (!initial) {
- initial = true;
+ if (first) {
+ first = false;
continue;
}
+ if (space_info == data_sinfo) {
+ /* Migrate the block group to the data relocation space_info. */
+ struct btrfs_space_info *reloc_sinfo = data_sinfo->sub_group[0];
+ int factor;
+
+ ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
+ factor = btrfs_bg_type_to_factor(bg->flags);
+
+ down_write(&space_info->groups_sem);
+ list_del_init(&bg->list);
+ /* We can assume this as we choose the second empty one. */
+ ASSERT(!list_empty(&space_info->block_groups[index]));
+ up_write(&space_info->groups_sem);
+
+ spin_lock(&space_info->lock);
+ space_info->total_bytes -= bg->length;
+ space_info->disk_total -= bg->length * factor;
+ /* There is no allocation ever happened. */
+ ASSERT(bg->used == 0);
+ ASSERT(bg->zone_unusable == 0);
+ /* No super block in a block group on the zoned setup. */
+ ASSERT(bg->bytes_super == 0);
+ spin_unlock(&space_info->lock);
+
+ bg->space_info = reloc_sinfo;
+ if (reloc_sinfo->block_group_kobjs[index] == NULL)
+ btrfs_sysfs_add_block_group_type(bg);
+
+ btrfs_add_bg_to_space_info(fs_info, bg);
+ }
+
fs_info->data_reloc_bg = bg->start;
set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags);
btrfs_zone_activate(bg);
@@ -2552,11 +2610,18 @@ again:
if (IS_ERR(trans))
return;
+ /* Allocate new BG in the data relocation space_info. */
+ space_info = data_sinfo->sub_group[0];
+ ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE);
btrfs_end_transaction(trans);
if (ret == 1) {
+ /*
+ * We allocated a new block group in the data relocation space_info. We
+ * can take that one.
+ */
+ first = false;
did_chunk_alloc = true;
- bg_list = &space_info->block_groups[index];
goto again;
}
}
diff --git a/fs/buffer.c b/fs/buffer.c
index ead4dc85debd..6a8752f7bbed 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -157,8 +157,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
*/
void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
{
- __end_buffer_read_notouch(bh, uptodate);
put_bh(bh);
+ __end_buffer_read_notouch(bh, uptodate);
}
EXPORT_SYMBOL(end_buffer_read_sync);
diff --git a/fs/coredump.c b/fs/coredump.c
index fedbead956ed..5dce257c67fc 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -345,7 +345,7 @@ static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm,
was_space = false;
err = cn_printf(cn, "%c", '\0');
if (err)
- return err;
+ return false;
(*argv)[(*argc)++] = cn->used;
}
}
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 7c236f64cdea..68a7d2861c58 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -402,7 +402,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
if (retval)
return retval;
- CLASS(get_unused_fd, fd)(O_CLOEXEC);
+ CLASS(get_unused_fd, fd)(open_flag);
if (fd < 0)
return fd;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index cc57367fb641..a07b8cf73ae2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2608,10 +2608,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
wakeup_bdi = inode_io_list_move_locked(inode, wb,
dirty_list);
- spin_unlock(&wb->list_lock);
- spin_unlock(&inode->i_lock);
- trace_writeback_dirty_inode_enqueue(inode);
-
/*
* If this is the first dirty inode for this bdi,
* we have to wake-up the corresponding bdi thread
@@ -2621,6 +2617,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
if (wakeup_bdi &&
(wb->bdi->capabilities & BDI_CAP_WRITEBACK))
wb_wakeup_delayed(wb);
+
+ spin_unlock(&wb->list_lock);
+ spin_unlock(&inode->i_lock);
+ trace_writeback_dirty_inode_enqueue(inode);
+
return;
}
}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ecb869e895ab..67c2318bfc42 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -289,11 +289,6 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
}
}
- if (attr->blksize != 0)
- inode->i_blkbits = ilog2(attr->blksize);
- else
- inode->i_blkbits = inode->i_sb->s_blocksize_bits;
-
/*
* Don't set the sticky bit in i_mode, unless we want the VFS
* to check permissions. This prevents failures due to the
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 6f25d4cfea9f..b84f6af2eb4c 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -363,14 +363,14 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
if (iomap->flags & IOMAP_F_SHARED)
dio->flags |= IOMAP_DIO_COW;
- if (iomap->flags & IOMAP_F_NEW) {
+ if (iomap->flags & IOMAP_F_NEW)
need_zeroout = true;
- } else if (iomap->type == IOMAP_MAPPED) {
- if (iomap_dio_can_use_fua(iomap, dio))
- bio_opf |= REQ_FUA;
- else
- dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
- }
+ else if (iomap->type == IOMAP_MAPPED &&
+ iomap_dio_can_use_fua(iomap, dio))
+ bio_opf |= REQ_FUA;
+
+ if (!(bio_opf & REQ_FUA))
+ dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
/*
* We can only do deferred completion for pure overwrites that
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 3c293a5a21b1..457f91c412d4 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -142,9 +142,9 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
struct kernfs_node *kn = kernfs_dentry_node(dentry);
struct kernfs_iattrs *attrs;
- attrs = kernfs_iattrs_noalloc(kn);
+ attrs = kernfs_iattrs(kn);
if (!attrs)
- return -ENODATA;
+ return -ENOMEM;
return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size);
}
diff --git a/fs/namespace.c b/fs/namespace.c
index ddfd4457d338..ae6d1312b184 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1197,10 +1197,7 @@ static void commit_tree(struct mount *mnt)
if (!mnt_ns_attached(mnt)) {
for (struct mount *m = mnt; m; m = next_mnt(m, mnt))
- if (unlikely(mnt_ns_attached(m)))
- m = skip_mnt_tree(m);
- else
- mnt_add_to_ns(n, m);
+ mnt_add_to_ns(n, m);
n->nr_mounts += n->pending_mounts;
n->pending_mounts = 0;
}
@@ -2704,6 +2701,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
lock_mnt_tree(child);
q = __lookup_mnt(&child->mnt_parent->mnt,
child->mnt_mountpoint);
+ commit_tree(child);
if (q) {
struct mountpoint *mp = root.mp;
struct mount *r = child;
@@ -2713,7 +2711,6 @@ static int attach_recursive_mnt(struct mount *source_mnt,
mp = shorter;
mnt_change_mountpoint(r, mp, q);
}
- commit_tree(child);
}
unpin_mountpoint(&root);
unlock_mount_hash();
@@ -2862,6 +2859,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
return attach_recursive_mnt(mnt, p, mp);
}
+static int may_change_propagation(const struct mount *m)
+{
+ struct mnt_namespace *ns = m->mnt_ns;
+
+ // it must be mounted in some namespace
+ if (IS_ERR_OR_NULL(ns)) // is_mounted()
+ return -EINVAL;
+ // and the caller must be admin in userns of that namespace
+ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+ return 0;
+}
+
/*
* Sanity check the flags to change_mnt_propagation.
*/
@@ -2898,10 +2908,10 @@ static int do_change_type(struct path *path, int ms_flags)
return -EINVAL;
namespace_lock();
- if (!check_mnt(mnt)) {
- err = -EINVAL;
+ err = may_change_propagation(mnt);
+ if (err)
goto out_unlock;
- }
+
if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse);
if (err)
@@ -3347,18 +3357,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
namespace_lock();
- err = -EINVAL;
- /* To and From must be mounted */
- if (!is_mounted(&from->mnt))
- goto out;
- if (!is_mounted(&to->mnt))
- goto out;
-
- err = -EPERM;
- /* We should be allowed to modify mount namespaces of both mounts */
- if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ err = may_change_propagation(from);
+ if (err)
goto out;
- if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ err = may_change_propagation(to);
+ if (err)
goto out;
err = -EINVAL;
@@ -4551,20 +4554,10 @@ SYSCALL_DEFINE5(move_mount,
if (flags & MOVE_MOUNT_SET_GROUP) mflags |= MNT_TREE_PROPAGATION;
if (flags & MOVE_MOUNT_BENEATH) mflags |= MNT_TREE_BENEATH;
- lflags = 0;
- if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
- if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
uflags = 0;
- if (flags & MOVE_MOUNT_F_EMPTY_PATH) uflags = AT_EMPTY_PATH;
- from_name = getname_maybe_null(from_pathname, uflags);
- if (IS_ERR(from_name))
- return PTR_ERR(from_name);
+ if (flags & MOVE_MOUNT_T_EMPTY_PATH)
+ uflags = AT_EMPTY_PATH;
- lflags = 0;
- if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
- if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
- uflags = 0;
- if (flags & MOVE_MOUNT_T_EMPTY_PATH) uflags = AT_EMPTY_PATH;
to_name = getname_maybe_null(to_pathname, uflags);
if (IS_ERR(to_name))
return PTR_ERR(to_name);
@@ -4577,11 +4570,24 @@ SYSCALL_DEFINE5(move_mount,
to_path = fd_file(f_to)->f_path;
path_get(&to_path);
} else {
+ lflags = 0;
+ if (flags & MOVE_MOUNT_T_SYMLINKS)
+ lflags |= LOOKUP_FOLLOW;
+ if (flags & MOVE_MOUNT_T_AUTOMOUNTS)
+ lflags |= LOOKUP_AUTOMOUNT;
ret = filename_lookup(to_dfd, to_name, lflags, &to_path, NULL);
if (ret)
return ret;
}
+ uflags = 0;
+ if (flags & MOVE_MOUNT_F_EMPTY_PATH)
+ uflags = AT_EMPTY_PATH;
+
+ from_name = getname_maybe_null(from_pathname, uflags);
+ if (IS_ERR(from_name))
+ return PTR_ERR(from_name);
+
if (!from_name && from_dfd >= 0) {
CLASS(fd_raw, f_from)(from_dfd);
if (fd_empty(f_from))
@@ -4590,6 +4596,11 @@ SYSCALL_DEFINE5(move_mount,
return vfs_move_mount(&fd_file(f_from)->f_path, &to_path, mflags);
}
+ lflags = 0;
+ if (flags & MOVE_MOUNT_F_SYMLINKS)
+ lflags |= LOOKUP_FOLLOW;
+ if (flags & MOVE_MOUNT_F_AUTOMOUNTS)
+ lflags |= LOOKUP_AUTOMOUNT;
ret = filename_lookup(from_dfd, from_name, lflags, &from_path, NULL);
if (ret)
return ret;
@@ -5176,7 +5187,8 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
int ret;
struct mount_kattr kattr = {};
- kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE;
+ if (flags & OPEN_TREE_CLONE)
+ kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE;
if (flags & AT_RECURSIVE)
kattr.kflags |= MOUNT_KATTR_RECURSE;
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 3e804da1e1eb..a95e7aadafd0 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -281,8 +281,10 @@ reassess:
} else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) {
notes |= MADE_PROGRESS;
} else {
- if (!stream->failed)
+ if (!stream->failed) {
stream->transferred += transferred;
+ stream->transferred_valid = true;
+ }
if (front->transferred < front->len)
set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags);
notes |= MADE_PROGRESS;
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 0f3a36852a4d..cbf3d9194c7b 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -254,6 +254,7 @@ reassess_streams:
if (front->start + front->transferred > stream->collected_to) {
stream->collected_to = front->start + front->transferred;
stream->transferred = stream->collected_to - wreq->start;
+ stream->transferred_valid = true;
notes |= MADE_PROGRESS;
}
if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
@@ -356,6 +357,7 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
{
struct netfs_inode *ictx = netfs_inode(wreq->inode);
size_t transferred;
+ bool transferred_valid = false;
int s;
_enter("R=%x", wreq->debug_id);
@@ -376,12 +378,16 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
continue;
if (!list_empty(&stream->subrequests))
return false;
- if (stream->transferred < transferred)
+ if (stream->transferred_valid &&
+ stream->transferred < transferred) {
transferred = stream->transferred;
+ transferred_valid = true;
+ }
}
/* Okay, declare that all I/O is complete. */
- wreq->transferred = transferred;
+ if (transferred_valid)
+ wreq->transferred = transferred;
trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
if (wreq->io_streams[1].active &&
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 50bee2c4130d..0584cba1a043 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -118,12 +118,12 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
wreq->io_streams[0].prepare_write = ictx->ops->prepare_write;
wreq->io_streams[0].issue_write = ictx->ops->issue_write;
wreq->io_streams[0].collected_to = start;
- wreq->io_streams[0].transferred = LONG_MAX;
+ wreq->io_streams[0].transferred = 0;
wreq->io_streams[1].stream_nr = 1;
wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE;
wreq->io_streams[1].collected_to = start;
- wreq->io_streams[1].transferred = LONG_MAX;
+ wreq->io_streams[1].transferred = 0;
if (fscache_resources_valid(&wreq->cache_resources)) {
wreq->io_streams[1].avail = true;
wreq->io_streams[1].active = true;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 70b8687dc45e..dbd63a74df4b 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -225,7 +225,7 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
struct ovl_cattr *attr)
{
struct dentry *ret;
- inode_lock(workdir->d_inode);
+ inode_lock_nested(workdir->d_inode, I_MUTEX_PARENT);
ret = ovl_create_real(ofs, workdir,
ovl_lookup_temp(ofs, workdir), attr);
inode_unlock(workdir->d_inode);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index a33115e7384c..41033bac96cb 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -1552,7 +1552,8 @@ void ovl_copyattr(struct inode *inode)
int ovl_parent_lock(struct dentry *parent, struct dentry *child)
{
inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
- if (!child || child->d_parent == parent)
+ if (!child ||
+ (!d_unhashed(child) && child->d_parent == parent))
return 0;
inode_unlock(parent->d_inode);
diff --git a/fs/pidfs.c b/fs/pidfs.c
index edc35522d75c..108e7527f837 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -296,12 +296,12 @@ static __u32 pidfs_coredump_mask(unsigned long mm_flags)
static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
{
struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
+ struct task_struct *task __free(put_task) = NULL;
struct pid *pid = pidfd_pid(file);
size_t usize = _IOC_SIZE(cmd);
struct pidfd_info kinfo = {};
struct pidfs_exit_info *exit_info;
struct user_namespace *user_ns;
- struct task_struct *task;
struct pidfs_attr *attr;
const struct cred *c;
__u64 mask;
diff --git a/fs/pnode.c b/fs/pnode.c
index 81f7599bdac4..6f7d02f3fa98 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -111,7 +111,8 @@ void change_mnt_propagation(struct mount *mnt, int type)
return;
}
if (IS_MNT_SHARED(mnt)) {
- m = propagation_source(mnt);
+ if (type == MS_SLAVE || !hlist_empty(&mnt->mnt_slave_list))
+ m = propagation_source(mnt);
if (list_empty(&mnt->mnt_share)) {
mnt_release_group_id(mnt);
} else {
@@ -637,10 +638,11 @@ void propagate_umount(struct list_head *set)
}
// now to_umount consists of all acceptable candidates
- // deal with reparenting of remaining overmounts on those
+ // deal with reparenting of surviving overmounts on those
list_for_each_entry(m, &to_umount, mnt_list) {
- if (m->overmount)
- reparent(m->overmount);
+ struct mount *over = m->overmount;
+ if (over && !will_be_unmounted(over))
+ reparent(over);
}
// and fold them into the set
diff --git a/fs/splice.c b/fs/splice.c
index 4d6df083e0c0..f5094b6d00a0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -739,6 +739,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
sd.pos = kiocb.ki_pos;
if (ret <= 0)
break;
+ WARN_ONCE(ret > sd.total_len - left,
+ "Splice Exceeded! ret=%zd tot=%zu left=%zu\n",
+ ret, sd.total_len, left);
sd.num_spliced += ret;
sd.total_len -= ret;
diff --git a/include/linux/export.h b/include/linux/export.h
index f35d03b4113b..a686fd0ba406 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -91,6 +91,6 @@
#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", ns)
#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", ns)
-#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods)
+#define EXPORT_SYMBOL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods)
#endif /* _LINUX_EXPORT_H */
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 185bd8196503..98c96d649bf9 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -150,6 +150,7 @@ struct netfs_io_stream {
bool active; /* T if stream is active */
bool need_retry; /* T if this stream needs retrying */
bool failed; /* T if this stream failed */
+ bool transferred_valid; /* T is ->transferred is valid */
};
/*
diff --git a/kernel/signal.c b/kernel/signal.c
index e2c928de7d2c..fe9190d84f28 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -4067,6 +4067,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
{
struct pid *pid;
enum pid_type type;
+ int ret;
/* Enforce flags be set to 0 until we add an extension. */
if (flags & ~PIDFD_SEND_SIGNAL_FLAGS)
@@ -4108,7 +4109,10 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
}
}
- return do_pidfd_send_signal(pid, sig, type, info, flags);
+ ret = do_pidfd_send_signal(pid, sig, type, info, flags);
+ put_pid(pid);
+
+ return ret;
}
static int
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index 5a5a7a5f7e1d..a4ac80bb1003 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -446,9 +446,6 @@ TEST_F(coredump, socket_detect_userspace_client)
if (info.coredump_mask & PIDFD_COREDUMPED)
goto out;
- if (read(fd_coredump, &c, 1) < 1)
- goto out;
-
exit_code = EXIT_SUCCESS;
out:
if (fd_peer_pidfd >= 0)
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index b1e4618399be..a688871a98eb 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -107,6 +107,26 @@
#endif
#endif
+#ifndef __NR_open_tree_attr
+ #if defined __alpha__
+ #define __NR_open_tree_attr 577
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_open_tree_attr (467 + 4000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_open_tree_attr (467 + 6000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_open_tree_attr (467 + 5000)
+ #endif
+ #elif defined __ia64__
+ #define __NR_open_tree_attr (467 + 1024)
+ #else
+ #define __NR_open_tree_attr 467
+ #endif
+#endif
+
#ifndef MOUNT_ATTR_IDMAP
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
@@ -121,6 +141,12 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag
return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
}
+static inline int sys_open_tree_attr(int dfd, const char *path, unsigned int flags,
+ struct mount_attr *attr, size_t size)
+{
+ return syscall(__NR_open_tree_attr, dfd, path, flags, attr, size);
+}
+
static ssize_t write_nointr(int fd, const void *buf, size_t count)
{
ssize_t ret;
@@ -1222,6 +1248,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
attr.userns_fd = get_userns_fd(0, 10000, 10000);
ASSERT_GE(attr.userns_fd, 0);
ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ /*
+ * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
+ * to bypass this mount_setattr() restriction.
+ */
+ ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+
ASSERT_EQ(close(attr.userns_fd), 0);
ASSERT_EQ(close(open_tree_fd), 0);
}
@@ -1255,6 +1287,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
ASSERT_GE(attr.userns_fd, 0);
ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
sizeof(attr)), 0);
+ /*
+ * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
+ * to bypass this mount_setattr() restriction.
+ */
+ ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+
ASSERT_EQ(close(attr.userns_fd), 0);
ASSERT_EQ(close(open_tree_fd), 0);
}
@@ -1321,6 +1359,19 @@ TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
ASSERT_EQ(close(open_tree_fd), 0);
}
+static bool expected_uid_gid(int dfd, const char *path, int flags,
+ uid_t expected_uid, gid_t expected_gid)
+{
+ int ret;
+ struct stat st;
+
+ ret = fstatat(dfd, path, &st, flags);
+ if (ret < 0)
+ return false;
+
+ return st.st_uid == expected_uid && st.st_gid == expected_gid;
+}
+
/**
* Validate that currently changing the idmapping of an idmapped mount fails.
*/
@@ -1331,6 +1382,8 @@ TEST_F(mount_setattr_idmapped, change_idmapping)
.attr_set = MOUNT_ATTR_IDMAP,
};
+ ASSERT_TRUE(expected_uid_gid(-EBADF, "/mnt/D", 0, 0, 0));
+
if (!mount_setattr_supported())
SKIP(return, "mount_setattr syscall not supported");
@@ -1348,27 +1401,25 @@ TEST_F(mount_setattr_idmapped, change_idmapping)
AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
ASSERT_EQ(close(attr.userns_fd), 0);
+ EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 0, 0));
+ EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000));
+
/* Change idmapping on a detached mount that is already idmapped. */
attr.userns_fd = get_userns_fd(0, 20000, 10000);
ASSERT_GE(attr.userns_fd, 0);
ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ /*
+ * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
+ * to bypass this mount_setattr() restriction.
+ */
+ EXPECT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 20000, 20000));
+ EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000));
+
ASSERT_EQ(close(attr.userns_fd), 0);
ASSERT_EQ(close(open_tree_fd), 0);
}
-static bool expected_uid_gid(int dfd, const char *path, int flags,
- uid_t expected_uid, gid_t expected_gid)
-{
- int ret;
- struct stat st;
-
- ret = fstatat(dfd, path, &st, flags);
- if (ret < 0)
- return false;
-
- return st.st_uid == expected_uid && st.st_gid == expected_gid;
-}
-
TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
{
int open_tree_fd = -EBADF;