diff options
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 141 |
1 files changed, 40 insertions, 101 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index e2c76a30802b..b9081b817d28 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -275,18 +275,18 @@ static const struct vm_operations_struct shmem_vm_ops; static const struct vm_operations_struct shmem_anon_vm_ops; static struct file_system_type shmem_fs_type; -bool shmem_mapping(struct address_space *mapping) +bool shmem_mapping(const struct address_space *mapping) { return mapping->a_ops == &shmem_aops; } EXPORT_SYMBOL_GPL(shmem_mapping); -bool vma_is_anon_shmem(struct vm_area_struct *vma) +bool vma_is_anon_shmem(const struct vm_area_struct *vma) { return vma->vm_ops == &shmem_anon_vm_ops; } -bool vma_is_shmem(struct vm_area_struct *vma) +bool vma_is_shmem(const struct vm_area_struct *vma) { return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops; } @@ -573,42 +573,6 @@ static int shmem_confirm_swap(struct address_space *mapping, pgoff_t index, static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; static int tmpfs_huge __read_mostly = SHMEM_HUGE_NEVER; -/** - * shmem_mapping_size_orders - Get allowable folio orders for the given file size. - * @mapping: Target address_space. - * @index: The page index. - * @write_end: end of a write, could extend inode size. - * - * This returns huge orders for folios (when supported) based on the file size - * which the mapping currently allows at the given index. The index is relevant - * due to alignment considerations the mapping might have. The returned order - * may be less than the size passed. - * - * Return: The orders. - */ -static inline unsigned int -shmem_mapping_size_orders(struct address_space *mapping, pgoff_t index, loff_t write_end) -{ - unsigned int order; - size_t size; - - if (!mapping_large_folio_support(mapping) || !write_end) - return 0; - - /* Calculate the write size based on the write_end */ - size = write_end - (index << PAGE_SHIFT); - order = filemap_get_order(size); - if (!order) - return 0; - - /* If we're not aligned, allocate a smaller folio */ - if (index & ((1UL << order) - 1)) - order = __ffs(index); - - order = min_t(size_t, order, MAX_PAGECACHE_ORDER); - return order > 0 ? BIT(order + 1) - 1 : 0; -} - static unsigned int shmem_get_orders_within_size(struct inode *inode, unsigned long within_size_orders, pgoff_t index, loff_t write_end) @@ -655,22 +619,21 @@ static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t index * For tmpfs mmap()'s huge order, we still use PMD-sized order to * allocate huge pages due to lack of a write size hint. * - * Otherwise, tmpfs will allow getting a highest order hint based on - * the size of write and fallocate paths, then will try each allowable - * huge orders. + * For tmpfs with 'huge=always' or 'huge=within_size' mount option, + * we will always try PMD-sized order first. If that failed, it will + * fall back to small large folios. */ switch (SHMEM_SB(inode->i_sb)->huge) { case SHMEM_HUGE_ALWAYS: if (vma) return maybe_pmd_order; - return shmem_mapping_size_orders(inode->i_mapping, index, write_end); + return THP_ORDERS_ALL_FILE_DEFAULT; case SHMEM_HUGE_WITHIN_SIZE: if (vma) within_size_orders = maybe_pmd_order; else - within_size_orders = shmem_mapping_size_orders(inode->i_mapping, - index, write_end); + within_size_orders = THP_ORDERS_ALL_FILE_DEFAULT; within_size_orders = shmem_get_orders_within_size(inode, within_size_orders, index, write_end); @@ -1006,15 +969,15 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end) { XA_STATE(xas, &mapping->i_pages, start); - struct page *page; + struct folio *folio; unsigned long swapped = 0; unsigned long max = end - 1; rcu_read_lock(); - xas_for_each(&xas, page, max) { - if (xas_retry(&xas, page)) + xas_for_each(&xas, folio, max) { + if (xas_retry(&xas, folio)) continue; - if (xa_is_value(page)) + if (xa_is_value(folio)) swapped += 1 << xas_get_order(&xas); if (xas.xa_index == max) break; @@ -1698,13 +1661,13 @@ try_split: } /* - * The delete_from_swap_cache() below could be left for + * The swap_cache_del_folio() below could be left for * shrink_folio_list()'s folio_free_swap() to dispose of; * but I'm a little nervous about letting this folio out of * shmem_writeout() in a hybrid half-tmpfs-half-swap state * e.g. folio_mapping(folio) might give an unexpected answer. */ - delete_from_swap_cache(folio); + swap_cache_del_folio(folio); goto redirty; } if (nr_pages > 1) @@ -1817,7 +1780,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, vm_flags_t vm_flags = vma ? vma->vm_flags : 0; unsigned int global_orders; - if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags))) + if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force))) return 0; global_orders = shmem_huge_global_enabled(inode, index, write_end, @@ -2082,7 +2045,7 @@ retry: new->swap = entry; memcg1_swapin(entry, nr_pages); - shadow = get_shadow_from_swap_cache(entry); + shadow = swap_cache_get_shadow(entry); if (shadow) workingset_refault(new, shadow); folio_add_lru(new); @@ -2120,13 +2083,11 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp, struct shmem_inode_info *info, pgoff_t index, struct vm_area_struct *vma) { + struct swap_cluster_info *ci; struct folio *new, *old = *foliop; swp_entry_t entry = old->swap; - struct address_space *swap_mapping = swap_address_space(entry); - pgoff_t swap_index = swap_cache_index(entry); - XA_STATE(xas, &swap_mapping->i_pages, swap_index); int nr_pages = folio_nr_pages(old); - int error = 0, i; + int error = 0; /* * We have arrived here because our zones are constrained, so don't @@ -2155,38 +2116,15 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp, new->swap = entry; folio_set_swapcache(new); - /* Swap cache still stores N entries instead of a high-order entry */ - xa_lock_irq(&swap_mapping->i_pages); - for (i = 0; i < nr_pages; i++) { - void *item = xas_load(&xas); + ci = swap_cluster_get_and_lock_irq(old); + __swap_cache_replace_folio(ci, old, new); + mem_cgroup_replace_folio(old, new); + shmem_update_stats(new, nr_pages); + shmem_update_stats(old, -nr_pages); + swap_cluster_unlock_irq(ci); - if (item != old) { - error = -ENOENT; - break; - } - - xas_store(&xas, new); - xas_next(&xas); - } - if (!error) { - mem_cgroup_replace_folio(old, new); - shmem_update_stats(new, nr_pages); - shmem_update_stats(old, -nr_pages); - } - xa_unlock_irq(&swap_mapping->i_pages); - - if (unlikely(error)) { - /* - * Is this possible? I think not, now that our callers - * check both the swapcache flag and folio->private - * after getting the folio lock; but be defensive. - * Reverse old to newpage for clear and free. - */ - old = new; - } else { - folio_add_lru(new); - *foliop = new; - } + folio_add_lru(new); + *foliop = new; folio_clear_swapcache(old); old->private = NULL; @@ -2220,7 +2158,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, nr_pages = folio_nr_pages(folio); folio_wait_writeback(folio); if (!skip_swapcache) - delete_from_swap_cache(folio); + swap_cache_del_folio(folio); /* * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks) @@ -2235,7 +2173,7 @@ static int shmem_split_large_entry(struct inode *inode, pgoff_t index, { struct address_space *mapping = inode->i_mapping; XA_STATE_ORDER(xas, &mapping->i_pages, index, 0); - int split_order = 0, entry_order; + int split_order = 0; int i; /* Convert user data gfp flags to xarray node gfp flags */ @@ -2253,15 +2191,12 @@ static int shmem_split_large_entry(struct inode *inode, pgoff_t index, goto unlock; } - entry_order = xas_get_order(&xas); - - if (!entry_order) + cur_order = xas_get_order(&xas); + if (!cur_order) goto unlock; /* Try to split large swap entry in pagecache */ - cur_order = entry_order; - swap_index = round_down(index, 1 << entry_order); - + swap_index = round_down(index, 1 << cur_order); split_order = xas_try_split_min_order(cur_order); while (cur_order > 0) { @@ -2354,7 +2289,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } /* Look it up and read it in.. */ - folio = swap_cache_get_folio(swap, NULL, 0); + folio = swap_cache_get_folio(swap); if (!folio) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) { /* Direct swapin skipping swap cache & readahead */ @@ -2379,6 +2314,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, count_vm_event(PGMAJFAULT); count_memcg_event_mm(fault_mm, PGMAJFAULT); } + } else { + swap_update_readahead(folio, NULL, 0); } if (order > folio_order(folio)) { @@ -2430,7 +2367,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, goto failed; } folio_wait_writeback(folio); - nr_pages = folio_nr_pages(folio); /* * Some architectures may have to restore extra metadata to the @@ -2458,7 +2394,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, folio->swap.val = 0; swapcache_clear(si, swap, nr_pages); } else { - delete_from_swap_cache(folio); + swap_cache_del_folio(folio); } folio_mark_dirty(folio); swap_free_nr(swap, nr_pages); @@ -5081,7 +5017,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_flags |= SB_NOUSER; } sb->s_export_op = &shmem_export_ops; - sb->s_flags |= SB_NOSEC | SB_I_VERSION; + sb->s_flags |= SB_NOSEC; #if IS_ENABLED(CONFIG_UNICODE) if (!ctx->encoding && ctx->strict_encoding) { @@ -5341,7 +5277,7 @@ static const struct super_operations shmem_ops = { .get_dquots = shmem_get_dquots, #endif .evict_inode = shmem_evict_inode, - .drop_inode = generic_delete_inode, + .drop_inode = inode_just_drop, .put_super = shmem_put_super, #ifdef CONFIG_TRANSPARENT_HUGEPAGE .nr_cached_objects = shmem_unused_huge_count, @@ -5385,6 +5321,9 @@ int shmem_init_fs_context(struct fs_context *fc) fc->fs_private = ctx; fc->ops = &shmem_fs_context_ops; +#ifdef CONFIG_TMPFS + fc->sb_flags |= SB_I_VERSION; +#endif return 0; } |