diff options
Diffstat (limited to 'mm/shmem.c')
| -rw-r--r-- | mm/shmem.c | 193 |
1 files changed, 108 insertions, 85 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index b9081b817d28..d578d8e765d7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -66,7 +66,7 @@ static struct vfsmount *shm_mnt __ro_after_init; #include <linux/falloc.h> #include <linux/splice.h> #include <linux/security.h> -#include <linux/swapops.h> +#include <linux/leafops.h> #include <linux/mempolicy.h> #include <linux/namei.h> #include <linux/ctype.h> @@ -131,8 +131,7 @@ struct shmem_options { #define SHMEM_SEEN_INODES 2 #define SHMEM_SEEN_HUGE 4 #define SHMEM_SEEN_INUMS 8 -#define SHMEM_SEEN_NOSWAP 16 -#define SHMEM_SEEN_QUOTA 32 +#define SHMEM_SEEN_QUOTA 16 }; #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -570,8 +569,37 @@ static int shmem_confirm_swap(struct address_space *mapping, pgoff_t index, #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* ifdef here to avoid bloating shmem.o when not necessary */ -static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; -static int tmpfs_huge __read_mostly = SHMEM_HUGE_NEVER; +#if defined(CONFIG_TRANSPARENT_HUGEPAGE_SHMEM_HUGE_NEVER) +#define SHMEM_HUGE_DEFAULT SHMEM_HUGE_NEVER +#elif defined(CONFIG_TRANSPARENT_HUGEPAGE_SHMEM_HUGE_ALWAYS) +#define SHMEM_HUGE_DEFAULT SHMEM_HUGE_ALWAYS +#elif defined(CONFIG_TRANSPARENT_HUGEPAGE_SHMEM_HUGE_WITHIN_SIZE) +#define SHMEM_HUGE_DEFAULT SHMEM_HUGE_WITHIN_SIZE +#elif defined(CONFIG_TRANSPARENT_HUGEPAGE_SHMEM_HUGE_ADVISE) +#define SHMEM_HUGE_DEFAULT SHMEM_HUGE_ADVISE +#else +#define SHMEM_HUGE_DEFAULT SHMEM_HUGE_NEVER +#endif + +static int shmem_huge __read_mostly = SHMEM_HUGE_DEFAULT; + +#undef SHMEM_HUGE_DEFAULT + +#if defined(CONFIG_TRANSPARENT_HUGEPAGE_TMPFS_HUGE_NEVER) +#define TMPFS_HUGE_DEFAULT SHMEM_HUGE_NEVER +#elif defined(CONFIG_TRANSPARENT_HUGEPAGE_TMPFS_HUGE_ALWAYS) +#define TMPFS_HUGE_DEFAULT SHMEM_HUGE_ALWAYS +#elif defined(CONFIG_TRANSPARENT_HUGEPAGE_TMPFS_HUGE_WITHIN_SIZE) +#define TMPFS_HUGE_DEFAULT SHMEM_HUGE_WITHIN_SIZE +#elif defined(CONFIG_TRANSPARENT_HUGEPAGE_TMPFS_HUGE_ADVISE) +#define TMPFS_HUGE_DEFAULT SHMEM_HUGE_ADVISE +#else +#define TMPFS_HUGE_DEFAULT SHMEM_HUGE_NEVER +#endif + +static int tmpfs_huge __read_mostly = TMPFS_HUGE_DEFAULT; + +#undef TMPFS_HUGE_DEFAULT static unsigned int shmem_get_orders_within_size(struct inode *inode, unsigned long within_size_orders, pgoff_t index, @@ -616,34 +644,23 @@ static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t index * the mTHP interface, so we still use PMD-sized huge order to * check whether global control is enabled. * - * For tmpfs mmap()'s huge order, we still use PMD-sized order to - * allocate huge pages due to lack of a write size hint. - * * For tmpfs with 'huge=always' or 'huge=within_size' mount option, * we will always try PMD-sized order first. If that failed, it will * fall back to small large folios. */ switch (SHMEM_SB(inode->i_sb)->huge) { case SHMEM_HUGE_ALWAYS: - if (vma) - return maybe_pmd_order; - return THP_ORDERS_ALL_FILE_DEFAULT; case SHMEM_HUGE_WITHIN_SIZE: - if (vma) - within_size_orders = maybe_pmd_order; - else - within_size_orders = THP_ORDERS_ALL_FILE_DEFAULT; - - within_size_orders = shmem_get_orders_within_size(inode, within_size_orders, - index, write_end); + within_size_orders = shmem_get_orders_within_size(inode, + THP_ORDERS_ALL_FILE_DEFAULT, index, write_end); if (within_size_orders > 0) return within_size_orders; fallthrough; case SHMEM_HUGE_ADVISE: if (vm_flags & VM_HUGEPAGE) - return maybe_pmd_order; + return THP_ORDERS_ALL_FILE_DEFAULT; fallthrough; default: return 0; @@ -853,9 +870,9 @@ static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t index static void shmem_update_stats(struct folio *folio, int nr_pages) { if (folio_test_pmd_mappable(folio)) - __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr_pages); - __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_pages); - __lruvec_stat_mod_folio(folio, NR_SHMEM, nr_pages); + lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr_pages); + lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_pages); + lruvec_stat_mod_folio(folio, NR_SHMEM, nr_pages); } /* @@ -1076,7 +1093,7 @@ static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index) * Remove range of pages and swap entries from page cache, and free them. * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. */ -static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, +static void shmem_undo_range(struct inode *inode, loff_t lstart, uoff_t lend, bool unfalloc) { struct address_space *mapping = inode->i_mapping; @@ -1133,7 +1150,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT); if (folio) { - same_folio = lend < folio_pos(folio) + folio_size(folio); + same_folio = lend < folio_next_pos(folio); folio_mark_dirty(folio); if (!truncate_inode_partial_folio(folio, lstart, lend)) { start = folio_next_index(folio); @@ -1227,7 +1244,7 @@ whole_folios: shmem_recalc_inode(inode, 0, -nr_swaps_freed); } -void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) +void shmem_truncate_range(struct inode *inode, loff_t lstart, uoff_t lend) { shmem_undo_range(inode, lstart, lend, false); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); @@ -1617,7 +1634,7 @@ try_split: folio_mark_uptodate(folio); } - if (!folio_alloc_swap(folio, __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN)) { + if (!folio_alloc_swap(folio)) { bool first_swapped = shmem_recalc_inode(inode, 0, nr_pages); int error; @@ -1882,6 +1899,7 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf, struct shmem_inode_info *info = SHMEM_I(inode); unsigned long suitable_orders = 0; struct folio *folio = NULL; + pgoff_t aligned_index; long pages; int error, order; @@ -1895,10 +1913,12 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf, order = highest_order(suitable_orders); while (suitable_orders) { pages = 1UL << order; - index = round_down(index, pages); - folio = shmem_alloc_folio(gfp, order, info, index); - if (folio) + aligned_index = round_down(index, pages); + folio = shmem_alloc_folio(gfp, order, info, aligned_index); + if (folio) { + index = aligned_index; goto allocated; + } if (pages == HPAGE_PMD_NR) count_vm_event(THP_FILE_FALLBACK); @@ -2254,7 +2274,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct mm_struct *fault_mm = vma ? vma->vm_mm : NULL; struct shmem_inode_info *info = SHMEM_I(inode); - swp_entry_t swap, index_entry; + swp_entry_t swap; + softleaf_t index_entry; struct swap_info_struct *si; struct folio *folio = NULL; bool skip_swapcache = false; @@ -2266,7 +2287,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, swap = index_entry; *foliop = NULL; - if (is_poisoned_swp_entry(index_entry)) + if (softleaf_is_poison_marker(index_entry)) return -EIO; si = get_swap_device(index_entry); @@ -2756,8 +2777,7 @@ unsigned long shmem_get_unmapped_area(struct file *file, if (len > TASK_SIZE) return -ENOMEM; - addr = mm_get_unmapped_area(current->mm, file, uaddr, len, pgoff, - flags); + addr = mm_get_unmapped_area(file, uaddr, len, pgoff, flags); if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) return addr; @@ -2835,8 +2855,7 @@ unsigned long shmem_get_unmapped_area(struct file *file, if (inflated_len < len) return addr; - inflated_addr = mm_get_unmapped_area(current->mm, NULL, uaddr, - inflated_len, 0, flags); + inflated_addr = mm_get_unmapped_area(NULL, uaddr, inflated_len, 0, flags); if (IS_ERR_VALUE(inflated_addr)) return addr; if (inflated_addr & ~PAGE_MASK) @@ -2924,16 +2943,17 @@ out_nomem: return retval; } -static int shmem_mmap(struct file *file, struct vm_area_struct *vma) +static int shmem_mmap_prepare(struct vm_area_desc *desc) { + struct file *file = desc->file; struct inode *inode = file_inode(file); file_accessed(file); /* This is anonymous shared memory if it is unlinked at the time of mmap */ if (inode->i_nlink) - vma->vm_ops = &shmem_vm_ops; + desc->vm_ops = &shmem_vm_ops; else - vma->vm_ops = &shmem_anon_vm_ops; + desc->vm_ops = &shmem_anon_vm_ops; return 0; } @@ -3858,12 +3878,7 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir, inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); inode_inc_iversion(dir); - if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) - d_add(dentry, inode); - else - d_instantiate(dentry, inode); - - dget(dentry); /* Extra count - pin the dentry in core */ + d_make_persistent(dentry, inode); return error; out_iput: @@ -3924,7 +3939,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); - int ret = 0; + int ret; /* * No ordinary (disk based) filesystem counts links as inodes; @@ -3936,29 +3951,19 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink) { ret = shmem_reserve_inode(inode->i_sb, NULL); if (ret) - goto out; + return ret; } ret = simple_offset_add(shmem_get_offset_ctx(dir), dentry); if (ret) { if (inode->i_nlink) shmem_free_inode(inode->i_sb, 0); - goto out; + return ret; } dir->i_size += BOGO_DIRENT_SIZE; - inode_set_mtime_to_ts(dir, - inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); inode_inc_iversion(dir); - inc_nlink(inode); - ihold(inode); /* New dentry reference */ - dget(dentry); /* Extra pinning count for the created dentry */ - if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) - d_add(dentry, inode); - else - d_instantiate(dentry, inode); -out: - return ret; + return simple_link(old_dentry, dir, dentry); } static int shmem_unlink(struct inode *dir, struct dentry *dentry) @@ -3971,11 +3976,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) simple_offset_remove(shmem_get_offset_ctx(dir), dentry); dir->i_size -= BOGO_DIRENT_SIZE; - inode_set_mtime_to_ts(dir, - inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); inode_inc_iversion(dir); - drop_nlink(inode); - dput(dentry); /* Undo the count from "create" - does all the work */ + simple_unlink(dir, dentry); /* * For now, VFS can't deal with case-insensitive negative dentries, so @@ -4130,11 +4132,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, dir->i_size += BOGO_DIRENT_SIZE; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); inode_inc_iversion(dir); - if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) - d_add(dentry, inode); - else - d_instantiate(dentry, inode); - dget(dentry); + d_make_persistent(dentry, inode); return 0; out_remove_offset: @@ -4677,7 +4675,6 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) "Turning off swap in unprivileged tmpfs mounts unsupported"); } ctx->noswap = true; - ctx->seen |= SHMEM_SEEN_NOSWAP; break; case Opt_quota: if (fc->user_ns != &init_user_ns) @@ -4827,14 +4824,15 @@ static int shmem_reconfigure(struct fs_context *fc) err = "Current inum too high to switch to 32-bit inums"; goto out; } - if ((ctx->seen & SHMEM_SEEN_NOSWAP) && ctx->noswap && !sbinfo->noswap) { + + /* + * "noswap" doesn't use fsparam_flag_no, i.e. there's no "swap" + * counterpart for (re-)enabling swap. + */ + if (ctx->noswap && !sbinfo->noswap) { err = "Cannot disable swap on remount"; goto out; } - if (!(ctx->seen & SHMEM_SEEN_NOSWAP) && !ctx->noswap && sbinfo->noswap) { - err = "Cannot enable swap on remount if it was disabled on first mount"; - goto out; - } if (ctx->seen & SHMEM_SEEN_QUOTA && !sb_any_quota_loaded(fc->root->d_sb)) { @@ -5203,7 +5201,7 @@ static const struct address_space_operations shmem_aops = { }; static const struct file_operations shmem_file_operations = { - .mmap = shmem_mmap, + .mmap_prepare = shmem_mmap_prepare, .open = shmem_file_open, .get_unmapped_area = shmem_get_unmapped_area, #ifdef CONFIG_TMPFS @@ -5334,7 +5332,7 @@ static struct file_system_type shmem_fs_type = { #ifdef CONFIG_TMPFS .parameters = shmem_fs_parameters, #endif - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP | FS_MGTIME, }; @@ -5772,11 +5770,11 @@ unsigned long shmem_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags); + return mm_get_unmapped_area(file, addr, len, pgoff, flags); } #endif -void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) +void shmem_truncate_range(struct inode *inode, loff_t lstart, uoff_t lend) { truncate_inode_pages_range(inode->i_mapping, lstart, lend); } @@ -5878,14 +5876,9 @@ struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, } EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt); -/** - * shmem_zero_setup - setup a shared anonymous mapping - * @vma: the vma to be mmapped is prepared by do_mmap - */ -int shmem_zero_setup(struct vm_area_struct *vma) +static struct file *__shmem_zero_setup(unsigned long start, unsigned long end, vm_flags_t vm_flags) { - struct file *file; - loff_t size = vma->vm_end - vma->vm_start; + loff_t size = end - start; /* * Cloning a new file under mmap_lock leads to a lock ordering conflict @@ -5893,7 +5886,18 @@ int shmem_zero_setup(struct vm_area_struct *vma) * accessible to the user through its mapping, use S_PRIVATE flag to * bypass file security, in the same way as shmem_kernel_file_setup(). */ - file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags); + return shmem_kernel_file_setup("dev/zero", size, vm_flags); +} + +/** + * shmem_zero_setup - setup a shared anonymous mapping + * @vma: the vma to be mmapped is prepared by do_mmap + * Returns: 0 on success, or error + */ +int shmem_zero_setup(struct vm_area_struct *vma) +{ + struct file *file = __shmem_zero_setup(vma->vm_start, vma->vm_end, vma->vm_flags); + if (IS_ERR(file)) return PTR_ERR(file); @@ -5906,6 +5910,25 @@ int shmem_zero_setup(struct vm_area_struct *vma) } /** + * shmem_zero_setup_desc - same as shmem_zero_setup, but determined by VMA + * descriptor for convenience. + * @desc: Describes VMA + * Returns: 0 on success, or error + */ +int shmem_zero_setup_desc(struct vm_area_desc *desc) +{ + struct file *file = __shmem_zero_setup(desc->start, desc->end, desc->vm_flags); + + if (IS_ERR(file)) + return PTR_ERR(file); + + desc->vm_file = file; + desc->vm_ops = &shmem_anon_vm_ops; + + return 0; +} + +/** * shmem_read_folio_gfp - read into page cache, using specified page allocation flags. * @mapping: the folio's address_space * @index: the folio index |
