summaryrefslogtreecommitdiff
path: root/mm/shmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c141
1 files changed, 40 insertions, 101 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index e2c76a30802b..b9081b817d28 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -275,18 +275,18 @@ static const struct vm_operations_struct shmem_vm_ops;
static const struct vm_operations_struct shmem_anon_vm_ops;
static struct file_system_type shmem_fs_type;
-bool shmem_mapping(struct address_space *mapping)
+bool shmem_mapping(const struct address_space *mapping)
{
return mapping->a_ops == &shmem_aops;
}
EXPORT_SYMBOL_GPL(shmem_mapping);
-bool vma_is_anon_shmem(struct vm_area_struct *vma)
+bool vma_is_anon_shmem(const struct vm_area_struct *vma)
{
return vma->vm_ops == &shmem_anon_vm_ops;
}
-bool vma_is_shmem(struct vm_area_struct *vma)
+bool vma_is_shmem(const struct vm_area_struct *vma)
{
return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops;
}
@@ -573,42 +573,6 @@ static int shmem_confirm_swap(struct address_space *mapping, pgoff_t index,
static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
static int tmpfs_huge __read_mostly = SHMEM_HUGE_NEVER;
-/**
- * shmem_mapping_size_orders - Get allowable folio orders for the given file size.
- * @mapping: Target address_space.
- * @index: The page index.
- * @write_end: end of a write, could extend inode size.
- *
- * This returns huge orders for folios (when supported) based on the file size
- * which the mapping currently allows at the given index. The index is relevant
- * due to alignment considerations the mapping might have. The returned order
- * may be less than the size passed.
- *
- * Return: The orders.
- */
-static inline unsigned int
-shmem_mapping_size_orders(struct address_space *mapping, pgoff_t index, loff_t write_end)
-{
- unsigned int order;
- size_t size;
-
- if (!mapping_large_folio_support(mapping) || !write_end)
- return 0;
-
- /* Calculate the write size based on the write_end */
- size = write_end - (index << PAGE_SHIFT);
- order = filemap_get_order(size);
- if (!order)
- return 0;
-
- /* If we're not aligned, allocate a smaller folio */
- if (index & ((1UL << order) - 1))
- order = __ffs(index);
-
- order = min_t(size_t, order, MAX_PAGECACHE_ORDER);
- return order > 0 ? BIT(order + 1) - 1 : 0;
-}
-
static unsigned int shmem_get_orders_within_size(struct inode *inode,
unsigned long within_size_orders, pgoff_t index,
loff_t write_end)
@@ -655,22 +619,21 @@ static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t index
* For tmpfs mmap()'s huge order, we still use PMD-sized order to
* allocate huge pages due to lack of a write size hint.
*
- * Otherwise, tmpfs will allow getting a highest order hint based on
- * the size of write and fallocate paths, then will try each allowable
- * huge orders.
+ * For tmpfs with 'huge=always' or 'huge=within_size' mount option,
+ * we will always try PMD-sized order first. If that failed, it will
+ * fall back to small large folios.
*/
switch (SHMEM_SB(inode->i_sb)->huge) {
case SHMEM_HUGE_ALWAYS:
if (vma)
return maybe_pmd_order;
- return shmem_mapping_size_orders(inode->i_mapping, index, write_end);
+ return THP_ORDERS_ALL_FILE_DEFAULT;
case SHMEM_HUGE_WITHIN_SIZE:
if (vma)
within_size_orders = maybe_pmd_order;
else
- within_size_orders = shmem_mapping_size_orders(inode->i_mapping,
- index, write_end);
+ within_size_orders = THP_ORDERS_ALL_FILE_DEFAULT;
within_size_orders = shmem_get_orders_within_size(inode, within_size_orders,
index, write_end);
@@ -1006,15 +969,15 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
pgoff_t start, pgoff_t end)
{
XA_STATE(xas, &mapping->i_pages, start);
- struct page *page;
+ struct folio *folio;
unsigned long swapped = 0;
unsigned long max = end - 1;
rcu_read_lock();
- xas_for_each(&xas, page, max) {
- if (xas_retry(&xas, page))
+ xas_for_each(&xas, folio, max) {
+ if (xas_retry(&xas, folio))
continue;
- if (xa_is_value(page))
+ if (xa_is_value(folio))
swapped += 1 << xas_get_order(&xas);
if (xas.xa_index == max)
break;
@@ -1698,13 +1661,13 @@ try_split:
}
/*
- * The delete_from_swap_cache() below could be left for
+ * The swap_cache_del_folio() below could be left for
* shrink_folio_list()'s folio_free_swap() to dispose of;
* but I'm a little nervous about letting this folio out of
* shmem_writeout() in a hybrid half-tmpfs-half-swap state
* e.g. folio_mapping(folio) might give an unexpected answer.
*/
- delete_from_swap_cache(folio);
+ swap_cache_del_folio(folio);
goto redirty;
}
if (nr_pages > 1)
@@ -1817,7 +1780,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
vm_flags_t vm_flags = vma ? vma->vm_flags : 0;
unsigned int global_orders;
- if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags)))
+ if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force)))
return 0;
global_orders = shmem_huge_global_enabled(inode, index, write_end,
@@ -2082,7 +2045,7 @@ retry:
new->swap = entry;
memcg1_swapin(entry, nr_pages);
- shadow = get_shadow_from_swap_cache(entry);
+ shadow = swap_cache_get_shadow(entry);
if (shadow)
workingset_refault(new, shadow);
folio_add_lru(new);
@@ -2120,13 +2083,11 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index,
struct vm_area_struct *vma)
{
+ struct swap_cluster_info *ci;
struct folio *new, *old = *foliop;
swp_entry_t entry = old->swap;
- struct address_space *swap_mapping = swap_address_space(entry);
- pgoff_t swap_index = swap_cache_index(entry);
- XA_STATE(xas, &swap_mapping->i_pages, swap_index);
int nr_pages = folio_nr_pages(old);
- int error = 0, i;
+ int error = 0;
/*
* We have arrived here because our zones are constrained, so don't
@@ -2155,38 +2116,15 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
new->swap = entry;
folio_set_swapcache(new);
- /* Swap cache still stores N entries instead of a high-order entry */
- xa_lock_irq(&swap_mapping->i_pages);
- for (i = 0; i < nr_pages; i++) {
- void *item = xas_load(&xas);
+ ci = swap_cluster_get_and_lock_irq(old);
+ __swap_cache_replace_folio(ci, old, new);
+ mem_cgroup_replace_folio(old, new);
+ shmem_update_stats(new, nr_pages);
+ shmem_update_stats(old, -nr_pages);
+ swap_cluster_unlock_irq(ci);
- if (item != old) {
- error = -ENOENT;
- break;
- }
-
- xas_store(&xas, new);
- xas_next(&xas);
- }
- if (!error) {
- mem_cgroup_replace_folio(old, new);
- shmem_update_stats(new, nr_pages);
- shmem_update_stats(old, -nr_pages);
- }
- xa_unlock_irq(&swap_mapping->i_pages);
-
- if (unlikely(error)) {
- /*
- * Is this possible? I think not, now that our callers
- * check both the swapcache flag and folio->private
- * after getting the folio lock; but be defensive.
- * Reverse old to newpage for clear and free.
- */
- old = new;
- } else {
- folio_add_lru(new);
- *foliop = new;
- }
+ folio_add_lru(new);
+ *foliop = new;
folio_clear_swapcache(old);
old->private = NULL;
@@ -2220,7 +2158,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
nr_pages = folio_nr_pages(folio);
folio_wait_writeback(folio);
if (!skip_swapcache)
- delete_from_swap_cache(folio);
+ swap_cache_del_folio(folio);
/*
* Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
* won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
@@ -2235,7 +2173,7 @@ static int shmem_split_large_entry(struct inode *inode, pgoff_t index,
{
struct address_space *mapping = inode->i_mapping;
XA_STATE_ORDER(xas, &mapping->i_pages, index, 0);
- int split_order = 0, entry_order;
+ int split_order = 0;
int i;
/* Convert user data gfp flags to xarray node gfp flags */
@@ -2253,15 +2191,12 @@ static int shmem_split_large_entry(struct inode *inode, pgoff_t index,
goto unlock;
}
- entry_order = xas_get_order(&xas);
-
- if (!entry_order)
+ cur_order = xas_get_order(&xas);
+ if (!cur_order)
goto unlock;
/* Try to split large swap entry in pagecache */
- cur_order = entry_order;
- swap_index = round_down(index, 1 << entry_order);
-
+ swap_index = round_down(index, 1 << cur_order);
split_order = xas_try_split_min_order(cur_order);
while (cur_order > 0) {
@@ -2354,7 +2289,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
}
/* Look it up and read it in.. */
- folio = swap_cache_get_folio(swap, NULL, 0);
+ folio = swap_cache_get_folio(swap);
if (!folio) {
if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) {
/* Direct swapin skipping swap cache & readahead */
@@ -2379,6 +2314,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
count_vm_event(PGMAJFAULT);
count_memcg_event_mm(fault_mm, PGMAJFAULT);
}
+ } else {
+ swap_update_readahead(folio, NULL, 0);
}
if (order > folio_order(folio)) {
@@ -2430,7 +2367,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
goto failed;
}
folio_wait_writeback(folio);
- nr_pages = folio_nr_pages(folio);
/*
* Some architectures may have to restore extra metadata to the
@@ -2458,7 +2394,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
folio->swap.val = 0;
swapcache_clear(si, swap, nr_pages);
} else {
- delete_from_swap_cache(folio);
+ swap_cache_del_folio(folio);
}
folio_mark_dirty(folio);
swap_free_nr(swap, nr_pages);
@@ -5081,7 +5017,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_flags |= SB_NOUSER;
}
sb->s_export_op = &shmem_export_ops;
- sb->s_flags |= SB_NOSEC | SB_I_VERSION;
+ sb->s_flags |= SB_NOSEC;
#if IS_ENABLED(CONFIG_UNICODE)
if (!ctx->encoding && ctx->strict_encoding) {
@@ -5341,7 +5277,7 @@ static const struct super_operations shmem_ops = {
.get_dquots = shmem_get_dquots,
#endif
.evict_inode = shmem_evict_inode,
- .drop_inode = generic_delete_inode,
+ .drop_inode = inode_just_drop,
.put_super = shmem_put_super,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
.nr_cached_objects = shmem_unused_huge_count,
@@ -5385,6 +5321,9 @@ int shmem_init_fs_context(struct fs_context *fc)
fc->fs_private = ctx;
fc->ops = &shmem_fs_context_ops;
+#ifdef CONFIG_TMPFS
+ fc->sb_flags |= SB_I_VERSION;
+#endif
return 0;
}