From 8b17e540969a0983abe96ffc352397890ab67bf1 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 9 Feb 2025 19:55:20 +0100 Subject: vfs: add initial support for CONFIG_DEBUG_VFS Small collection of macros taken from mmdebug.h Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20250209185523.745956-2-mjguzik@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index be3ad155ec9f..d5e3fb14ad8c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2,6 +2,7 @@ #ifndef _LINUX_FS_H #define _LINUX_FS_H +#include #include #include #include -- cgit v1.2.3 From 3eb7e95104141609d4aed15affadedb81c408f03 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 9 Feb 2025 19:55:22 +0100 Subject: vfs: use the new debug macros in inode_set_cached_link() Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20250209185523.745956-4-mjguzik@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index d5e3fb14ad8c..e71d58c7f59c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -792,6 +792,8 @@ struct inode { static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen) { + VFS_WARN_ON_INODE(strlen(link) != linklen, inode); + VFS_WARN_ON_INODE(inode->i_opflags & IOP_CACHED_LINK, inode); inode->i_link = link; inode->i_linklen = linklen; inode->i_opflags |= IOP_CACHED_LINK; -- cgit v1.2.3 From 29d80d506b18384f64a54b01fae78184e2d327f3 Mon Sep 17 00:00:00 2001 From: Yuichiro Tsuji Date: Tue, 21 Jan 2025 16:08:22 +0900 Subject: open: Fix return type of several functions from long to int Fix the return type of several functions from long to int to match its actu al behavior. These functions only return int values. This change improves type consistency across the filesystem code and aligns the function signatu re with its existing implementation and usage. Reviewed-by: Jan Kara Signed-off-by: Yuichiro Tsuji Link: https://lore.kernel.org/r/20250121070844.4413-2-yuichtsu@amazon.com Signed-off-by: Christian Brauner --- fs/internal.h | 4 ++-- fs/open.c | 20 ++++++++++---------- include/linux/fs.h | 6 +++--- include/linux/syscalls.h | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/internal.h b/fs/internal.h index e7f02ae1e098..84607e7b05dc 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -187,8 +187,8 @@ extern struct open_how build_open_how(int flags, umode_t mode); extern int build_open_flags(const struct open_how *how, struct open_flags *op); struct file *file_close_fd_locked(struct files_struct *files, unsigned fd); -long do_ftruncate(struct file *file, loff_t length, int small); -long do_sys_ftruncate(unsigned int fd, loff_t length, int small); +int do_ftruncate(struct file *file, loff_t length, int small); +int do_sys_ftruncate(unsigned int fd, loff_t length, int small); int chmod_common(const struct path *path, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); diff --git a/fs/open.c b/fs/open.c index 932e5a6de63b..634fdca6501a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -67,11 +67,11 @@ int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry, return ret; } -long vfs_truncate(const struct path *path, loff_t length) +int vfs_truncate(const struct path *path, loff_t length) { struct mnt_idmap *idmap; struct inode *inode; - long error; + int error; inode = path->dentry->d_inode; @@ -123,7 +123,7 @@ mnt_drop_write_and_out: } EXPORT_SYMBOL_GPL(vfs_truncate); -long do_sys_truncate(const char __user *pathname, loff_t length) +int do_sys_truncate(const char __user *pathname, loff_t length) { unsigned int lookup_flags = LOOKUP_FOLLOW; struct path path; @@ -157,7 +157,7 @@ COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length } #endif -long do_ftruncate(struct file *file, loff_t length, int small) +int do_ftruncate(struct file *file, loff_t length, int small) { struct inode *inode; struct dentry *dentry; @@ -196,7 +196,7 @@ long do_ftruncate(struct file *file, loff_t length, int small) return error; } -long do_sys_ftruncate(unsigned int fd, loff_t length, int small) +int do_sys_ftruncate(unsigned int fd, loff_t length, int small) { if (length < 0) return -EINVAL; @@ -251,7 +251,7 @@ COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd, int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); - long ret; + int ret; loff_t sum; if (offset < 0 || len <= 0) @@ -460,7 +460,7 @@ static const struct cred *access_override_creds(void) return override_creds(override_cred); } -static long do_faccessat(int dfd, const char __user *filename, int mode, int flags) +static int do_faccessat(int dfd, const char __user *filename, int mode, int flags) { struct path path; struct inode *inode; @@ -1408,8 +1408,8 @@ struct file *file_open_root(const struct path *root, } EXPORT_SYMBOL(file_open_root); -static long do_sys_openat2(int dfd, const char __user *filename, - struct open_how *how) +static int do_sys_openat2(int dfd, const char __user *filename, + struct open_how *how) { struct open_flags op; int fd = build_open_flags(how, &op); @@ -1436,7 +1436,7 @@ static long do_sys_openat2(int dfd, const char __user *filename, return fd; } -long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) +int do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) { struct open_how how = build_open_how(flags, mode); return do_sys_openat2(dfd, filename, &how); diff --git a/include/linux/fs.h b/include/linux/fs.h index e71d58c7f59c..d1f1673956e2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2785,13 +2785,13 @@ static inline bool is_idmapped_mnt(const struct vfsmount *mnt) return mnt_idmap(mnt) != &nop_mnt_idmap; } -extern long vfs_truncate(const struct path *, loff_t); +int vfs_truncate(const struct path *, loff_t); int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len); -extern long do_sys_open(int dfd, const char __user *filename, int flags, - umode_t mode); +int do_sys_open(int dfd, const char __user *filename, int flags, + umode_t mode); extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(const struct path *, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c6333204d451..bae4490c1dda 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1266,14 +1266,14 @@ static inline long ksys_lchown(const char __user *filename, uid_t user, AT_SYMLINK_NOFOLLOW); } -extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small); +int do_sys_ftruncate(unsigned int fd, loff_t length, int small); static inline long ksys_ftruncate(unsigned int fd, loff_t length) { return do_sys_ftruncate(fd, length, 1); } -extern long do_sys_truncate(const char __user *pathname, loff_t length); +int do_sys_truncate(const char __user *pathname, loff_t length); static inline long ksys_truncate(const char __user *pathname, loff_t length) { -- cgit v1.2.3 From f326565c44419380d18290edee5f78921418f7a5 Mon Sep 17 00:00:00 2001 From: Yuichiro Tsuji Date: Tue, 21 Jan 2025 16:08:23 +0900 Subject: ioctl: Fix return type of several functions from long to int Fix the return type of several functions from long to int to match its actu al behavior. These functions only return int values. This change improves type consistency across the filesystem code and aligns the function signatu re with its existing implementation and usage. Reviewed-by: Jan Kara Signed-off-by: Yuichiro Tsuji Link: https://lore.kernel.org/r/20250121070844.4413-3-yuichtsu@amazon.com Signed-off-by: Christian Brauner --- fs/ioctl.c | 10 +++++----- include/linux/fs.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/ioctl.c b/fs/ioctl.c index 638a36be31c1..c91fd2b46a77 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -41,7 +41,7 @@ * * Returns 0 on success, -errno on error. */ -long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +int vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int error = -ENOTTY; @@ -228,8 +228,8 @@ static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap) return error; } -static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, - u64 off, u64 olen, u64 destoff) +static int ioctl_file_clone(struct file *dst_file, unsigned long srcfd, + u64 off, u64 olen, u64 destoff) { CLASS(fd, src_file)(srcfd); loff_t cloned; @@ -248,8 +248,8 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, return ret; } -static long ioctl_file_clone_range(struct file *file, - struct file_clone_range __user *argp) +static int ioctl_file_clone_range(struct file *file, + struct file_clone_range __user *argp) { struct file_clone_range args; diff --git a/include/linux/fs.h b/include/linux/fs.h index d1f1673956e2..c1763b022d06 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2030,7 +2030,7 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group); int vfs_fchmod(struct file *file, umode_t mode); int vfs_utimes(const struct path *path, struct timespec64 *times); -extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, -- cgit v1.2.3 From 1bb772565f327291b0a463b125c7646dc45ae8b4 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Thu, 6 Feb 2025 01:01:05 +0100 Subject: vfs: inline getname() It is merely a trivial wrapper around getname_flags which adds a zeroed argument, no point paying for an extra call. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20250206000105.432528-1-mjguzik@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/namei.c | 5 ----- include/linux/fs.h | 5 ++++- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/namei.c b/fs/namei.c index 21630a0f8e30..207898ee9fd5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -218,11 +218,6 @@ struct filename *getname_uflags(const char __user *filename, int uflags) return getname_flags(filename, flags); } -struct filename *getname(const char __user * filename) -{ - return getname_flags(filename, 0); -} - struct filename *__getname_maybe_null(const char __user *pathname) { struct filename *name; diff --git a/include/linux/fs.h b/include/linux/fs.h index c1763b022d06..3e07e4a44de6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2842,7 +2842,10 @@ extern int filp_close(struct file *, fl_owner_t id); extern struct filename *getname_flags(const char __user *, int); extern struct filename *getname_uflags(const char __user *, int); -extern struct filename *getname(const char __user *); +static inline struct filename *getname(const char __user *name) +{ + return getname_flags(name, 0); +} extern struct filename *getname_kernel(const char *); extern struct filename *__getname_maybe_null(const char __user *); static inline struct filename *getname_maybe_null(const char __user *name, int flags) -- cgit v1.2.3 From 1479be62582dbd2078390ef609f8f5ef351c15e8 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 12 Feb 2025 19:04:59 +0100 Subject: vfs: inline new_inode_pseudo() and de-staticize alloc_inode() The former is a no-op wrapper with the same argument. I left it in place to not lose the information who needs it -- one day "pseudo" inodes may start differing from what alloc_inode() returns. In the meantime no point taking a detour. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20250212180459.1022983-1-mjguzik@gmail.com Signed-off-by: Christian Brauner --- fs/inode.c | 29 ++++++++++++----------------- include/linux/fs.h | 6 +++++- 2 files changed, 17 insertions(+), 18 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/inode.c b/fs/inode.c index 875e66261f06..0cd230415097 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -327,7 +327,17 @@ static void i_callback(struct rcu_head *head) free_inode_nonrcu(inode); } -static struct inode *alloc_inode(struct super_block *sb) +/** + * alloc_inode - obtain an inode + * @sb: superblock + * + * Allocates a new inode for given superblock. + * Inode wont be chained in superblock s_inodes list + * This means : + * - fs can't be unmount + * - quotas, fsnotify, writeback can't work + */ +struct inode *alloc_inode(struct super_block *sb) { const struct super_operations *ops = sb->s_op; struct inode *inode; @@ -1159,21 +1169,6 @@ unsigned int get_next_ino(void) } EXPORT_SYMBOL(get_next_ino); -/** - * new_inode_pseudo - obtain an inode - * @sb: superblock - * - * Allocates a new inode for given superblock. - * Inode wont be chained in superblock s_inodes list - * This means : - * - fs can't be unmount - * - quotas, fsnotify, writeback can't work - */ -struct inode *new_inode_pseudo(struct super_block *sb) -{ - return alloc_inode(sb); -} - /** * new_inode - obtain an inode * @sb: superblock @@ -1190,7 +1185,7 @@ struct inode *new_inode(struct super_block *sb) { struct inode *inode; - inode = new_inode_pseudo(sb); + inode = alloc_inode(sb); if (inode) inode_sb_list_add(inode); return inode; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3e07e4a44de6..70c985f3d787 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3285,7 +3285,11 @@ static inline void __iget(struct inode *inode) extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); extern void __destroy_inode(struct inode *); -extern struct inode *new_inode_pseudo(struct super_block *sb); +struct inode *alloc_inode(struct super_block *sb); +static inline struct inode *new_inode_pseudo(struct super_block *sb) +{ + return alloc_inode(sb); +} extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); -- cgit v1.2.3 From e249056c91a2f14ee40de2bf24cf72d8e68101f5 Mon Sep 17 00:00:00 2001 From: Pan Deng Date: Fri, 28 Feb 2025 10:00:59 +0800 Subject: fs: place f_ref to 3rd cache line in struct file to resolve false sharing When running syscall pread in a high core count system, f_ref contends with the reading of f_mode, f_op, f_mapping, f_inode, f_flags in the same cache line. This change places f_ref to the 3rd cache line where fields are not updated as frequently as the 1st cache line, and the contention is grealy reduced according to tests. In addition, the size of file object is kept in 3 cache lines. This change has been tested with rocksdb benchmark readwhilewriting case in 1 socket 64 physical core 128 logical core baremetal machine, with build config CONFIG_RANDSTRUCT_NONE=y Command: ./db_bench --benchmarks="readwhilewriting" --threads $cnt --duration 60 The throughput(ops/s) is improved up to ~21%. ===== thread baseline compare 16 100% +1.3% 32 100% +2.2% 64 100% +7.2% 128 100% +20.9% It was also tested with UnixBench: syscall, fsbuffer, fstime, fsdisk cases that has been used for file struct layout tuning, no regression was observed. Signed-off-by: Pan Deng Link: https://lore.kernel.org/r/20250228020059.3023375-1-pan.deng@intel.com Tested-by: Lipeng Zhu Reviewed-by: Tianyou Li Reviewed-by: Tim Chen Signed-off-by: Christian Brauner --- include/linux/fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 70c985f3d787..9ab789cd1531 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1058,7 +1058,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) /** * struct file - Represents a file - * @f_ref: reference count * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context. * @f_mode: FMODE_* flags often used in hotpaths * @f_op: file operations @@ -1068,12 +1067,12 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_flags: file flags * @f_iocb_flags: iocb flags * @f_cred: stashed credentials of creator/opener + * @f_owner: file owner * @f_path: path of the file * @f_pos_lock: lock protecting file position * @f_pipe: specific to pipes * @f_pos: file position * @f_security: LSM security context of this file - * @f_owner: file owner * @f_wb_err: writeback error * @f_sb_err: per sb writeback errors * @f_ep: link of all epoll hooks for this file @@ -1081,9 +1080,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_llist: work queue entrypoint * @f_ra: file's readahead state * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.) + * @f_ref: reference count */ struct file { - file_ref_t f_ref; spinlock_t f_lock; fmode_t f_mode; const struct file_operations *f_op; @@ -1093,6 +1092,7 @@ struct file { unsigned int f_flags; unsigned int f_iocb_flags; const struct cred *f_cred; + struct fown_struct *f_owner; /* --- cacheline 1 boundary (64 bytes) --- */ struct path f_path; union { @@ -1106,7 +1106,6 @@ struct file { void *f_security; #endif /* --- cacheline 2 boundary (128 bytes) --- */ - struct fown_struct *f_owner; errseq_t f_wb_err; errseq_t f_sb_err; #ifdef CONFIG_EPOLL @@ -1118,6 +1117,7 @@ struct file { struct file_ra_state f_ra; freeptr_t f_freeptr; }; + file_ref_t f_ref; /* --- cacheline 3 boundary (192 bytes) --- */ } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ -- cgit v1.2.3 From 611851010c74046c0bc2b0461b72a6fae81c16d0 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Thu, 13 Mar 2025 15:27:44 +0100 Subject: fs: dedup handling of struct filename init and refcounts bumps No functional changes. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20250313142744.1323281-1-mjguzik@gmail.com Signed-off-by: Christian Brauner --- fs/namei.c | 17 +++++++++-------- include/linux/fs.h | 6 ++++++ kernel/auditsc.c | 12 +++++------- 3 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/namei.c b/fs/namei.c index 3209d65b5fc0..345e20143b5d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -125,6 +125,13 @@ #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) +static inline void initname(struct filename *name) +{ + name->uptr = NULL; + name->aname = NULL; + atomic_set(&name->refcnt, 1); +} + struct filename * getname_flags(const char __user *filename, int flags) { @@ -203,10 +210,7 @@ getname_flags(const char __user *filename, int flags) return ERR_PTR(-ENAMETOOLONG); } } - - atomic_set(&result->refcnt, 1); - result->uptr = filename; - result->aname = NULL; + initname(result); audit_getname(result); return result; } @@ -264,11 +268,8 @@ struct filename *getname_kernel(const char * filename) return ERR_PTR(-ENAMETOOLONG); } memcpy((char *)result->name, filename, len); - result->uptr = NULL; - result->aname = NULL; - atomic_set(&result->refcnt, 1); + initname(result); audit_getname(result); - return result; } EXPORT_SYMBOL(getname_kernel); diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ab789cd1531..ae1fce54eb60 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2859,6 +2859,12 @@ static inline struct filename *getname_maybe_null(const char __user *name, int f } extern void putname(struct filename *name); +static inline struct filename *refname(struct filename *name) +{ + atomic_inc(&name->refcnt); + return name; +} + extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern int finish_no_open(struct file *file, struct dentry *dentry); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9c853cde9abe..78fd876a5473 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2207,10 +2207,8 @@ __audit_reusename(const __user char *uptr) list_for_each_entry(n, &context->names_list, list) { if (!n->name) continue; - if (n->name->uptr == uptr) { - atomic_inc(&n->name->refcnt); - return n->name; - } + if (n->name->uptr == uptr) + return refname(n->name); } return NULL; } @@ -2237,7 +2235,7 @@ void __audit_getname(struct filename *name) n->name = name; n->name_len = AUDIT_NAME_FULL; name->aname = n; - atomic_inc(&name->refcnt); + refname(name); } static inline int audit_copy_fcaps(struct audit_names *name, @@ -2369,7 +2367,7 @@ out_alloc: return; if (name) { n->name = name; - atomic_inc(&name->refcnt); + refname(name); } out: @@ -2496,7 +2494,7 @@ void __audit_inode_child(struct inode *parent, if (found_parent) { found_child->name = found_parent->name; found_child->name_len = AUDIT_NAME_FULL; - atomic_inc(&found_child->name->refcnt); + refname(found_child->name); } } -- cgit v1.2.3