summaryrefslogtreecommitdiff
path: root/fs/namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c394
1 files changed, 185 insertions, 209 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index ddfd4457d338..dc01b14c58cd 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -33,6 +33,7 @@
#include <linux/shmem_fs.h>
#include <linux/mnt_idmapping.h>
#include <linux/pidfs.h>
+#include <linux/nstree.h>
#include "pnode.h"
#include "internal.h"
@@ -65,6 +66,15 @@ static int __init set_mphash_entries(char *str)
}
__setup("mphash_entries=", set_mphash_entries);
+static char * __initdata initramfs_options;
+static int __init initramfs_options_setup(char *str)
+{
+ initramfs_options = str;
+ return 1;
+}
+
+__setup("initramfs_options=", initramfs_options_setup);
+
static u64 event;
static DEFINE_XARRAY_FLAGS(mnt_id_xa, XA_FLAGS_ALLOC);
static DEFINE_IDA(mnt_group_ida);
@@ -80,13 +90,10 @@ static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
static struct mnt_namespace *emptied_ns; /* protected by namespace_sem */
-static DEFINE_SEQLOCK(mnt_ns_tree_lock);
#ifdef CONFIG_FSNOTIFY
LIST_HEAD(notify_list); /* protected by namespace_sem */
#endif
-static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
-static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
enum mount_kattr_flags_t {
MOUNT_KATTR_RECURSE = (1 << 0),
@@ -119,59 +126,18 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
{
+ struct ns_common *ns;
+
if (!node)
return NULL;
- return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
-}
-
-static int mnt_ns_cmp(struct rb_node *a, const struct rb_node *b)
-{
- struct mnt_namespace *ns_a = node_to_mnt_ns(a);
- struct mnt_namespace *ns_b = node_to_mnt_ns(b);
- u64 seq_a = ns_a->seq;
- u64 seq_b = ns_b->seq;
-
- if (seq_a < seq_b)
- return -1;
- if (seq_a > seq_b)
- return 1;
- return 0;
-}
-
-static inline void mnt_ns_tree_write_lock(void)
-{
- write_seqlock(&mnt_ns_tree_lock);
-}
-
-static inline void mnt_ns_tree_write_unlock(void)
-{
- write_sequnlock(&mnt_ns_tree_lock);
-}
-
-static void mnt_ns_tree_add(struct mnt_namespace *ns)
-{
- struct rb_node *node, *prev;
-
- mnt_ns_tree_write_lock();
- node = rb_find_add_rcu(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_cmp);
- /*
- * If there's no previous entry simply add it after the
- * head and if there is add it after the previous entry.
- */
- prev = rb_prev(&ns->mnt_ns_tree_node);
- if (!prev)
- list_add_rcu(&ns->mnt_ns_list, &mnt_ns_list);
- else
- list_add_rcu(&ns->mnt_ns_list, &node_to_mnt_ns(prev)->mnt_ns_list);
- mnt_ns_tree_write_unlock();
-
- WARN_ON_ONCE(node);
+ ns = rb_entry(node, struct ns_common, ns_tree_node);
+ return container_of(ns, struct mnt_namespace, ns);
}
static void mnt_ns_release(struct mnt_namespace *ns)
{
/* keep alive for {list,stat}mount() */
- if (refcount_dec_and_test(&ns->passive)) {
+ if (ns && refcount_dec_and_test(&ns->passive)) {
fsnotify_mntns_delete(ns);
put_user_ns(ns->user_ns);
kfree(ns);
@@ -181,32 +147,16 @@ DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
static void mnt_ns_release_rcu(struct rcu_head *rcu)
{
- mnt_ns_release(container_of(rcu, struct mnt_namespace, mnt_ns_rcu));
+ mnt_ns_release(container_of(rcu, struct mnt_namespace, ns.ns_rcu));
}
static void mnt_ns_tree_remove(struct mnt_namespace *ns)
{
/* remove from global mount namespace list */
- if (!is_anon_ns(ns)) {
- mnt_ns_tree_write_lock();
- rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
- list_bidir_del_rcu(&ns->mnt_ns_list);
- mnt_ns_tree_write_unlock();
- }
-
- call_rcu(&ns->mnt_ns_rcu, mnt_ns_release_rcu);
-}
+ if (ns_tree_active(ns))
+ ns_tree_remove(ns);
-static int mnt_ns_find(const void *key, const struct rb_node *node)
-{
- const u64 mnt_ns_id = *(u64 *)key;
- const struct mnt_namespace *ns = node_to_mnt_ns(node);
-
- if (mnt_ns_id < ns->seq)
- return -1;
- if (mnt_ns_id > ns->seq)
- return 1;
- return 0;
+ call_rcu(&ns->ns.ns_rcu, mnt_ns_release_rcu);
}
/*
@@ -225,28 +175,21 @@ static int mnt_ns_find(const void *key, const struct rb_node *node)
*/
static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
{
- struct mnt_namespace *ns;
- struct rb_node *node;
- unsigned int seq;
+ struct mnt_namespace *mnt_ns;
+ struct ns_common *ns;
guard(rcu)();
- do {
- seq = read_seqbegin(&mnt_ns_tree_lock);
- node = rb_find_rcu(&mnt_ns_id, &mnt_ns_tree, mnt_ns_find);
- if (node)
- break;
- } while (read_seqretry(&mnt_ns_tree_lock, seq));
-
- if (!node)
+ ns = ns_tree_lookup_rcu(mnt_ns_id, CLONE_NEWNS);
+ if (!ns)
return NULL;
/*
* The last reference count is put with RCU delay so we can
* unconditonally acquire a reference here.
*/
- ns = node_to_mnt_ns(node);
- refcount_inc(&ns->passive);
- return ns;
+ mnt_ns = container_of(ns, struct mnt_namespace, ns);
+ refcount_inc(&mnt_ns->passive);
+ return mnt_ns;
}
static inline void lock_mount_hash(void)
@@ -1017,7 +960,7 @@ static inline bool check_anonymous_mnt(struct mount *mnt)
return false;
seq = mnt->mnt_ns->seq_origin;
- return !seq || (seq == current->nsproxy->mnt_ns->seq);
+ return !seq || (seq == current->nsproxy->mnt_ns->ns.ns_id);
}
/*
@@ -1197,10 +1140,7 @@ static void commit_tree(struct mount *mnt)
if (!mnt_ns_attached(mnt)) {
for (struct mount *m = mnt; m; m = next_mnt(m, mnt))
- if (unlikely(mnt_ns_attached(m)))
- m = skip_mnt_tree(m);
- else
- mnt_add_to_ns(n, m);
+ mnt_add_to_ns(n, m);
n->nr_mounts += n->pending_mounts;
n->pending_mounts = 0;
}
@@ -2155,19 +2095,16 @@ struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool previous)
{
+ struct ns_common *ns;
+
guard(rcu)();
for (;;) {
- struct list_head *list;
-
- if (previous)
- list = rcu_dereference(list_bidir_prev_rcu(&mntns->mnt_ns_list));
- else
- list = rcu_dereference(list_next_rcu(&mntns->mnt_ns_list));
- if (list_is_head(list, &mnt_ns_list))
- return ERR_PTR(-ENOENT);
+ ns = ns_tree_adjoined_rcu(mntns, previous);
+ if (IS_ERR(ns))
+ return ERR_CAST(ns);
- mntns = list_entry_rcu(list, struct mnt_namespace, mnt_ns_list);
+ mntns = to_mnt_ns(ns);
/*
* The last passive reference count is put with RCU
@@ -2182,7 +2119,7 @@ struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool pr
* the mount namespace and it might already be on its
* deathbed.
*/
- if (!refcount_inc_not_zero(&mntns->ns.count))
+ if (!ns_ref_get(mntns))
continue;
return mntns;
@@ -2207,7 +2144,7 @@ static bool mnt_ns_loop(struct dentry *dentry)
if (!mnt_ns)
return false;
- return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
+ return current->nsproxy->mnt_ns->ns.ns_id >= mnt_ns->ns.ns_id;
}
struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
@@ -2458,7 +2395,7 @@ struct vfsmount *clone_private_mount(const struct path *path)
return ERR_PTR(-EINVAL);
}
- if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
if (__has_locked_children(old_mnt, path->dentry))
@@ -2704,6 +2641,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
lock_mnt_tree(child);
q = __lookup_mnt(&child->mnt_parent->mnt,
child->mnt_mountpoint);
+ commit_tree(child);
if (q) {
struct mountpoint *mp = root.mp;
struct mount *r = child;
@@ -2713,7 +2651,6 @@ static int attach_recursive_mnt(struct mount *source_mnt,
mp = shorter;
mnt_change_mountpoint(r, mp, q);
}
- commit_tree(child);
}
unpin_mountpoint(&root);
unlock_mount_hash();
@@ -2862,6 +2799,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
return attach_recursive_mnt(mnt, p, mp);
}
+static int may_change_propagation(const struct mount *m)
+{
+ struct mnt_namespace *ns = m->mnt_ns;
+
+ // it must be mounted in some namespace
+ if (IS_ERR_OR_NULL(ns)) // is_mounted()
+ return -EINVAL;
+ // and the caller must be admin in userns of that namespace
+ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+ return 0;
+}
+
/*
* Sanity check the flags to change_mnt_propagation.
*/
@@ -2898,10 +2848,10 @@ static int do_change_type(struct path *path, int ms_flags)
return -EINVAL;
namespace_lock();
- if (!check_mnt(mnt)) {
- err = -EINVAL;
+ err = may_change_propagation(mnt);
+ if (err)
goto out_unlock;
- }
+
if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse);
if (err)
@@ -3070,7 +3020,7 @@ static struct file *open_detached_copy(struct path *path, bool recursive)
if (is_anon_ns(src_mnt_ns))
ns->seq_origin = src_mnt_ns->seq_origin;
else
- ns->seq_origin = src_mnt_ns->seq;
+ ns->seq_origin = src_mnt_ns->ns.ns_id;
}
mnt = __do_loopback(path, recursive);
@@ -3279,7 +3229,7 @@ static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
* If you've mounted a non-root directory somewhere and want to do remount
* on it - tough luck.
*/
-static int do_remount(struct path *path, int ms_flags, int sb_flags,
+static int do_remount(struct path *path, int sb_flags,
int mnt_flags, void *data)
{
int err;
@@ -3347,18 +3297,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
namespace_lock();
- err = -EINVAL;
- /* To and From must be mounted */
- if (!is_mounted(&from->mnt))
- goto out;
- if (!is_mounted(&to->mnt))
- goto out;
-
- err = -EPERM;
- /* We should be allowed to modify mount namespaces of both mounts */
- if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ err = may_change_propagation(from);
+ if (err)
goto out;
- if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ err = may_change_propagation(to);
+ if (err)
goto out;
err = -EINVAL;
@@ -3724,8 +3667,10 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
int error;
error = security_sb_kern_mount(sb);
- if (!error && mount_too_revealing(sb, &mnt_flags))
+ if (!error && mount_too_revealing(sb, &mnt_flags)) {
+ errorfcp(fc, "VFS", "Mount too revealing");
error = -EPERM;
+ }
if (unlikely(error)) {
fc_drop_locked(fc);
@@ -4109,7 +4054,7 @@ int path_mount(const char *dev_name, struct path *path,
if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
return do_reconfigure_mnt(path, mnt_flags);
if (flags & MS_REMOUNT)
- return do_remount(path, flags, sb_flags, mnt_flags, data_page);
+ return do_remount(path, sb_flags, mnt_flags, data_page);
if (flags & MS_BIND)
return do_loopback(path, dev_name, flags & MS_REC);
if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
@@ -4148,20 +4093,11 @@ static void dec_mnt_namespaces(struct ucounts *ucounts)
static void free_mnt_ns(struct mnt_namespace *ns)
{
if (!is_anon_ns(ns))
- ns_free_inum(&ns->ns);
+ ns_common_free(ns);
dec_mnt_namespaces(ns->ucounts);
mnt_ns_tree_remove(ns);
}
-/*
- * Assign a sequence number so we can detect when we attempt to bind
- * mount a reference to an older mount namespace into the current
- * mount namespace, preventing reference counting loops. A 64bit
- * number incrementing at 10Ghz will take 12,427 years to wrap which
- * is effectively never, so we can ignore the possibility.
- */
-static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
-
static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
{
struct mnt_namespace *new_ns;
@@ -4177,22 +4113,20 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
dec_mnt_namespaces(ucounts);
return ERR_PTR(-ENOMEM);
}
- if (!anon) {
- ret = ns_alloc_inum(&new_ns->ns);
- if (ret) {
- kfree(new_ns);
- dec_mnt_namespaces(ucounts);
- return ERR_PTR(ret);
- }
+
+ if (anon)
+ ret = ns_common_init_inum(new_ns, MNT_NS_ANON_INO);
+ else
+ ret = ns_common_init(new_ns);
+ if (ret) {
+ kfree(new_ns);
+ dec_mnt_namespaces(ucounts);
+ return ERR_PTR(ret);
}
- new_ns->ns.ops = &mntns_operations;
if (!anon)
- new_ns->seq = atomic64_inc_return(&mnt_ns_seq);
- refcount_set(&new_ns->ns.count, 1);
+ ns_tree_gen_id(&new_ns->ns);
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
- INIT_LIST_HEAD(&new_ns->mnt_ns_list);
- RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
init_waitqueue_head(&new_ns->poll);
new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts;
@@ -4200,7 +4134,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
}
__latent_entropy
-struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
+struct mnt_namespace *copy_mnt_ns(u64 flags, struct mnt_namespace *ns,
struct user_namespace *user_ns, struct fs_struct *new_fs)
{
struct mnt_namespace *new_ns;
@@ -4231,7 +4165,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
namespace_unlock();
- ns_free_inum(&new_ns->ns);
+ ns_common_free(ns);
dec_mnt_namespaces(new_ns->ucounts);
mnt_ns_release(new_ns);
return ERR_CAST(new);
@@ -4278,7 +4212,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
if (pwdmnt)
mntput(pwdmnt);
- mnt_ns_tree_add(new_ns);
+ ns_tree_add_raw(new_ns);
return new_ns;
}
@@ -4441,7 +4375,7 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
ret = -EPERM;
if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
- pr_warn("VFS: Mount too revealing\n");
+ errorfcp(fc, "VFS", "Mount too revealing");
goto err_unlock;
}
@@ -4551,20 +4485,10 @@ SYSCALL_DEFINE5(move_mount,
if (flags & MOVE_MOUNT_SET_GROUP) mflags |= MNT_TREE_PROPAGATION;
if (flags & MOVE_MOUNT_BENEATH) mflags |= MNT_TREE_BENEATH;
- lflags = 0;
- if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
- if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
uflags = 0;
- if (flags & MOVE_MOUNT_F_EMPTY_PATH) uflags = AT_EMPTY_PATH;
- from_name = getname_maybe_null(from_pathname, uflags);
- if (IS_ERR(from_name))
- return PTR_ERR(from_name);
+ if (flags & MOVE_MOUNT_T_EMPTY_PATH)
+ uflags = AT_EMPTY_PATH;
- lflags = 0;
- if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
- if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
- uflags = 0;
- if (flags & MOVE_MOUNT_T_EMPTY_PATH) uflags = AT_EMPTY_PATH;
to_name = getname_maybe_null(to_pathname, uflags);
if (IS_ERR(to_name))
return PTR_ERR(to_name);
@@ -4577,11 +4501,24 @@ SYSCALL_DEFINE5(move_mount,
to_path = fd_file(f_to)->f_path;
path_get(&to_path);
} else {
+ lflags = 0;
+ if (flags & MOVE_MOUNT_T_SYMLINKS)
+ lflags |= LOOKUP_FOLLOW;
+ if (flags & MOVE_MOUNT_T_AUTOMOUNTS)
+ lflags |= LOOKUP_AUTOMOUNT;
ret = filename_lookup(to_dfd, to_name, lflags, &to_path, NULL);
if (ret)
return ret;
}
+ uflags = 0;
+ if (flags & MOVE_MOUNT_F_EMPTY_PATH)
+ uflags = AT_EMPTY_PATH;
+
+ from_name = getname_maybe_null(from_pathname, uflags);
+ if (IS_ERR(from_name))
+ return PTR_ERR(from_name);
+
if (!from_name && from_dfd >= 0) {
CLASS(fd_raw, f_from)(from_dfd);
if (fd_empty(f_from))
@@ -4590,6 +4527,11 @@ SYSCALL_DEFINE5(move_mount,
return vfs_move_mount(&fd_file(f_from)->f_path, &to_path, mflags);
}
+ lflags = 0;
+ if (flags & MOVE_MOUNT_F_SYMLINKS)
+ lflags |= LOOKUP_FOLLOW;
+ if (flags & MOVE_MOUNT_F_AUTOMOUNTS)
+ lflags |= LOOKUP_AUTOMOUNT;
ret = filename_lookup(from_dfd, from_name, lflags, &from_path, NULL);
if (ret)
return ret;
@@ -4996,7 +4938,7 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
return -EINVAL;
ns = get_proc_ns(file_inode(fd_file(f)));
- if (ns->ops->type != CLONE_NEWUSER)
+ if (ns->ns_type != CLONE_NEWUSER)
return -EINVAL;
/*
@@ -5176,7 +5118,8 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
int ret;
struct mount_kattr kattr = {};
- kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE;
+ if (flags & OPEN_TREE_CLONE)
+ kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE;
if (flags & AT_RECURSIVE)
kattr.kflags |= MOUNT_KATTR_RECURSE;
@@ -5388,7 +5331,7 @@ static int statmount_sb_source(struct kstatmount *s, struct seq_file *seq)
static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns)
{
s->sm.mask |= STATMOUNT_MNT_NS_ID;
- s->sm.mnt_ns_id = ns->seq;
+ s->sm.mnt_ns_id = ns->ns.ns_id;
}
static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
@@ -5699,7 +5642,6 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
struct mnt_namespace *ns)
{
- struct path root __free(path_put) = {};
struct mount *m;
int err;
@@ -5711,7 +5653,7 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
if (!s->mnt)
return -ENOENT;
- err = grab_requested_root(ns, &root);
+ err = grab_requested_root(ns, &s->root);
if (err)
return err;
@@ -5720,7 +5662,7 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
* mounts to show users.
*/
m = real_mount(s->mnt);
- if (!is_path_reachable(m, m->mnt.mnt_root, &root) &&
+ if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
@@ -5728,8 +5670,6 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
if (err)
return err;
- s->root = root;
-
/*
* Note that mount properties in mnt->mnt_flags, mnt->mnt_idmap
* can change concurrently as we only hold the read-side of the
@@ -5898,7 +5838,7 @@ static struct mnt_namespace *grab_requested_mnt_ns(const struct mnt_id_req *kreq
return ERR_PTR(-EINVAL);
ns = get_proc_ns(file_inode(fd_file(f)));
- if (ns->ops->type != CLONE_NEWNS)
+ if (ns->ns_type != CLONE_NEWNS)
return ERR_PTR(-EINVAL);
mnt_ns = to_mnt_ns(ns);
@@ -5951,28 +5891,40 @@ retry:
if (!ret)
ret = copy_statmount_to_user(ks);
kvfree(ks->seq.buf);
+ path_put(&ks->root);
if (retry_statmount(ret, &seq_size))
goto retry;
return ret;
}
-static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
- u64 last_mnt_id, u64 *mnt_ids, size_t nr_mnt_ids,
- bool reverse)
+struct klistmount {
+ u64 last_mnt_id;
+ u64 mnt_parent_id;
+ u64 *kmnt_ids;
+ u32 nr_mnt_ids;
+ struct mnt_namespace *ns;
+ struct path root;
+};
+
+static ssize_t do_listmount(struct klistmount *kls, bool reverse)
{
- struct path root __free(path_put) = {};
+ struct mnt_namespace *ns = kls->ns;
+ u64 mnt_parent_id = kls->mnt_parent_id;
+ u64 last_mnt_id = kls->last_mnt_id;
+ u64 *mnt_ids = kls->kmnt_ids;
+ size_t nr_mnt_ids = kls->nr_mnt_ids;
struct path orig;
struct mount *r, *first;
ssize_t ret;
rwsem_assert_held(&namespace_sem);
- ret = grab_requested_root(ns, &root);
+ ret = grab_requested_root(ns, &kls->root);
if (ret)
return ret;
if (mnt_parent_id == LSMT_ROOT) {
- orig = root;
+ orig = kls->root;
} else {
orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns);
if (!orig.mnt)
@@ -5984,7 +5936,7 @@ static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
* Don't trigger audit denials. We just want to determine what
* mounts to show users.
*/
- if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &root) &&
+ if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &kls->root) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
@@ -6017,14 +5969,45 @@ static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
return ret;
}
+static void __free_klistmount_free(const struct klistmount *kls)
+{
+ path_put(&kls->root);
+ kvfree(kls->kmnt_ids);
+ mnt_ns_release(kls->ns);
+}
+
+static inline int prepare_klistmount(struct klistmount *kls, struct mnt_id_req *kreq,
+ size_t nr_mnt_ids)
+{
+
+ u64 last_mnt_id = kreq->param;
+
+ /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
+ if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET)
+ return -EINVAL;
+
+ kls->last_mnt_id = last_mnt_id;
+
+ kls->nr_mnt_ids = nr_mnt_ids;
+ kls->kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kls->kmnt_ids),
+ GFP_KERNEL_ACCOUNT);
+ if (!kls->kmnt_ids)
+ return -ENOMEM;
+
+ kls->ns = grab_requested_mnt_ns(kreq);
+ if (!kls->ns)
+ return -ENOENT;
+
+ kls->mnt_parent_id = kreq->mnt_id;
+ return 0;
+}
+
SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
u64 __user *, mnt_ids, size_t, nr_mnt_ids, unsigned int, flags)
{
- u64 *kmnt_ids __free(kvfree) = NULL;
+ struct klistmount kls __free(klistmount_free) = {};
const size_t maxcount = 1000000;
- struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
struct mnt_id_req kreq;
- u64 last_mnt_id;
ssize_t ret;
if (flags & ~LISTMOUNT_REVERSE)
@@ -6045,22 +6028,12 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
if (ret)
return ret;
- last_mnt_id = kreq.param;
- /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
- if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET)
- return -EINVAL;
-
- kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kmnt_ids),
- GFP_KERNEL_ACCOUNT);
- if (!kmnt_ids)
- return -ENOMEM;
-
- ns = grab_requested_mnt_ns(&kreq);
- if (!ns)
- return -ENOENT;
+ ret = prepare_klistmount(&kls, &kreq, nr_mnt_ids);
+ if (ret)
+ return ret;
- if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
- !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
+ if (kreq.mnt_ns_id && (kls.ns != current->nsproxy->mnt_ns) &&
+ !ns_capable_noaudit(kls.ns->user_ns, CAP_SYS_ADMIN))
return -ENOENT;
/*
@@ -6068,39 +6041,43 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
* listmount() doesn't care about any mount properties.
*/
scoped_guard(rwsem_read, &namespace_sem)
- ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids,
- nr_mnt_ids, (flags & LISTMOUNT_REVERSE));
+ ret = do_listmount(&kls, (flags & LISTMOUNT_REVERSE));
if (ret <= 0)
return ret;
- if (copy_to_user(mnt_ids, kmnt_ids, ret * sizeof(*mnt_ids)))
+ if (copy_to_user(mnt_ids, kls.kmnt_ids, ret * sizeof(*mnt_ids)))
return -EFAULT;
return ret;
}
+struct mnt_namespace init_mnt_ns = {
+ .ns.inum = ns_init_inum(&init_mnt_ns),
+ .ns.ops = &mntns_operations,
+ .user_ns = &init_user_ns,
+ .ns.__ns_ref = REFCOUNT_INIT(1),
+ .ns.ns_type = ns_common_type(&init_mnt_ns),
+ .passive = REFCOUNT_INIT(1),
+ .mounts = RB_ROOT,
+ .poll = __WAIT_QUEUE_HEAD_INITIALIZER(init_mnt_ns.poll),
+};
+
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
struct mount *m;
- struct mnt_namespace *ns;
struct path root;
- mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
+ mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", initramfs_options);
if (IS_ERR(mnt))
panic("Can't create rootfs");
- ns = alloc_mnt_ns(&init_user_ns, true);
- if (IS_ERR(ns))
- panic("Can't allocate initial namespace");
- ns->seq = atomic64_inc_return(&mnt_ns_seq);
- ns->ns.inum = PROC_MNT_INIT_INO;
m = real_mount(mnt);
- ns->root = m;
- ns->nr_mounts = 1;
- mnt_add_to_ns(ns, m);
- init_task.nsproxy->mnt_ns = ns;
- get_mnt_ns(ns);
+ init_mnt_ns.root = m;
+ init_mnt_ns.nr_mounts = 1;
+ mnt_add_to_ns(&init_mnt_ns, m);
+ init_task.nsproxy->mnt_ns = &init_mnt_ns;
+ get_mnt_ns(&init_mnt_ns);
root.mnt = mnt;
root.dentry = mnt->mnt_root;
@@ -6108,7 +6085,7 @@ static void __init init_mount_tree(void)
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
- mnt_ns_tree_add(ns);
+ ns_tree_add(&init_mnt_ns);
}
void __init mnt_init(void)
@@ -6148,7 +6125,7 @@ void __init mnt_init(void)
void put_mnt_ns(struct mnt_namespace *ns)
{
- if (!refcount_dec_and_test(&ns->ns.count))
+ if (!ns_ref_put(ns))
return;
namespace_lock();
emptied_ns = ns;
@@ -6397,7 +6374,6 @@ static struct user_namespace *mntns_owner(struct ns_common *ns)
const struct proc_ns_operations mntns_operations = {
.name = "mnt",
- .type = CLONE_NEWNS,
.get = mntns_get,
.put = mntns_put,
.install = mntns_install,