summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile1
-rw-r--r--mm/internal.h6
-rw-r--r--mm/memblock.c93
-rw-r--r--mm/memfd_luo.c516
-rw-r--r--mm/shmem.c49
5 files changed, 604 insertions, 61 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 00ceb2418b64..2d0570a16e5b 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_NUMA) += memory-tiers.o
obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
+obj-$(CONFIG_LIVEUPDATE) += memfd_luo.o
obj-$(CONFIG_MEMCG_V1) += memcontrol-v1.o
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
ifdef CONFIG_SWAP
diff --git a/mm/internal.h b/mm/internal.h
index 89790def1bae..e430da900430 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1582,6 +1582,12 @@ void __meminit __init_page_from_nid(unsigned long pfn, int nid);
unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
int priority);
+int shmem_add_to_page_cache(struct folio *folio,
+ struct address_space *mapping,
+ pgoff_t index, void *expected, gfp_t gfp);
+int shmem_inode_acct_blocks(struct inode *inode, long pages);
+bool shmem_recalc_inode(struct inode *inode, long alloced, long swapped);
+
#ifdef CONFIG_SHRINKER_DEBUG
static inline __printf(2, 0) int shrinker_debugfs_name_alloc(
struct shrinker *shrinker, const char *fmt, va_list ap)
diff --git a/mm/memblock.c b/mm/memblock.c
index f0f2dc66e9a2..905d06b16348 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2445,60 +2445,59 @@ int reserve_mem_release_by_name(const char *name)
#define MEMBLOCK_KHO_FDT "memblock"
#define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
#define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
-static struct page *kho_fdt;
-static int reserve_mem_kho_finalize(struct kho_serialization *ser)
+static int __init reserved_mem_preserve(void)
{
- int err = 0, i;
+ unsigned int nr_preserved = 0;
+ int err;
- for (i = 0; i < reserved_mem_count; i++) {
+ for (unsigned int i = 0; i < reserved_mem_count; i++, nr_preserved++) {
struct reserve_mem_table *map = &reserved_mem_table[i];
struct page *page = phys_to_page(map->start);
unsigned int nr_pages = map->size >> PAGE_SHIFT;
- err |= kho_preserve_pages(page, nr_pages);
+ err = kho_preserve_pages(page, nr_pages);
+ if (err)
+ goto err_unpreserve;
}
- err |= kho_preserve_folio(page_folio(kho_fdt));
- err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
+ return 0;
- return notifier_from_errno(err);
-}
+err_unpreserve:
+ for (unsigned int i = 0; i < nr_preserved; i++) {
+ struct reserve_mem_table *map = &reserved_mem_table[i];
+ struct page *page = phys_to_page(map->start);
+ unsigned int nr_pages = map->size >> PAGE_SHIFT;
-static int reserve_mem_kho_notifier(struct notifier_block *self,
- unsigned long cmd, void *v)
-{
- switch (cmd) {
- case KEXEC_KHO_FINALIZE:
- return reserve_mem_kho_finalize((struct kho_serialization *)v);
- case KEXEC_KHO_ABORT:
- return NOTIFY_DONE;
- default:
- return NOTIFY_BAD;
+ kho_unpreserve_pages(page, nr_pages);
}
-}
-static struct notifier_block reserve_mem_kho_nb = {
- .notifier_call = reserve_mem_kho_notifier,
-};
+ return err;
+}
static int __init prepare_kho_fdt(void)
{
- int err = 0, i;
+ struct page *fdt_page;
void *fdt;
+ int err;
- kho_fdt = alloc_page(GFP_KERNEL);
- if (!kho_fdt)
- return -ENOMEM;
+ fdt_page = alloc_page(GFP_KERNEL);
+ if (!fdt_page) {
+ err = -ENOMEM;
+ goto err_report;
+ }
- fdt = page_to_virt(kho_fdt);
+ fdt = page_to_virt(fdt_page);
+ err = kho_preserve_pages(fdt_page, 1);
+ if (err)
+ goto err_free_fdt;
err |= fdt_create(fdt, PAGE_SIZE);
err |= fdt_finish_reservemap(fdt);
-
err |= fdt_begin_node(fdt, "");
err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
- for (i = 0; i < reserved_mem_count; i++) {
+
+ for (unsigned int i = 0; !err && i < reserved_mem_count; i++) {
struct reserve_mem_table *map = &reserved_mem_table[i];
err |= fdt_begin_node(fdt, map->name);
@@ -2508,14 +2507,29 @@ static int __init prepare_kho_fdt(void)
err |= fdt_end_node(fdt);
}
err |= fdt_end_node(fdt);
-
err |= fdt_finish(fdt);
- if (err) {
- pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
- put_page(kho_fdt);
- kho_fdt = NULL;
- }
+ if (err)
+ goto err_unpreserve_fdt;
+
+ err = kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
+ if (err)
+ goto err_unpreserve_fdt;
+
+ err = reserved_mem_preserve();
+ if (err)
+ goto err_remove_subtree;
+
+ return 0;
+
+err_remove_subtree:
+ kho_remove_subtree(fdt);
+err_unpreserve_fdt:
+ kho_unpreserve_pages(fdt_page, 1);
+err_free_fdt:
+ put_page(fdt_page);
+err_report:
+ pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
return err;
}
@@ -2530,13 +2544,6 @@ static int __init reserve_mem_init(void)
err = prepare_kho_fdt();
if (err)
return err;
-
- err = register_kho_notifier(&reserve_mem_kho_nb);
- if (err) {
- put_page(kho_fdt);
- kho_fdt = NULL;
- }
-
return err;
}
late_initcall(reserve_mem_init);
diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c
new file mode 100644
index 000000000000..4f6ba63b4310
--- /dev/null
+++ b/mm/memfd_luo.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * Copyright (C) 2025 Amazon.com Inc. or its affiliates.
+ * Pratyush Yadav <ptyadav@amazon.de>
+ */
+
+/**
+ * DOC: Memfd Preservation via LUO
+ *
+ * Overview
+ * ========
+ *
+ * Memory file descriptors (memfd) can be preserved over a kexec using the Live
+ * Update Orchestrator (LUO) file preservation. This allows userspace to
+ * transfer its memory contents to the next kernel after a kexec.
+ *
+ * The preservation is not intended to be transparent. Only select properties of
+ * the file are preserved. All others are reset to default. The preserved
+ * properties are described below.
+ *
+ * .. note::
+ * The LUO API is not stabilized yet, so the preserved properties of a memfd
+ * are also not stable and are subject to backwards incompatible changes.
+ *
+ * .. note::
+ * Currently a memfd backed by Hugetlb is not supported. Memfds created
+ * with ``MFD_HUGETLB`` will be rejected.
+ *
+ * Preserved Properties
+ * ====================
+ *
+ * The following properties of the memfd are preserved across kexec:
+ *
+ * File Contents
+ * All data stored in the file is preserved.
+ *
+ * File Size
+ * The size of the file is preserved. Holes in the file are filled by
+ * allocating pages for them during preservation.
+ *
+ * File Position
+ * The current file position is preserved, allowing applications to continue
+ * reading/writing from their last position.
+ *
+ * File Status Flags
+ * memfds are always opened with ``O_RDWR`` and ``O_LARGEFILE``. This property
+ * is maintained.
+ *
+ * Non-Preserved Properties
+ * ========================
+ *
+ * All properties which are not preserved must be assumed to be reset to
+ * default. This section describes some of those properties which may be more of
+ * note.
+ *
+ * ``FD_CLOEXEC`` flag
+ * A memfd can be created with the ``MFD_CLOEXEC`` flag that sets the
+ * ``FD_CLOEXEC`` on the file. This flag is not preserved and must be set
+ * again after restore via ``fcntl()``.
+ *
+ * Seals
+ * File seals are not preserved. The file is unsealed on restore and if
+ * needed, must be sealed again via ``fcntl()``.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bits.h>
+#include <linux/err.h>
+#include <linux/file.h>
+#include <linux/io.h>
+#include <linux/kexec_handover.h>
+#include <linux/kho/abi/memfd.h>
+#include <linux/liveupdate.h>
+#include <linux/shmem_fs.h>
+#include <linux/vmalloc.h>
+#include "internal.h"
+
+static int memfd_luo_preserve_folios(struct file *file,
+ struct kho_vmalloc *kho_vmalloc,
+ struct memfd_luo_folio_ser **out_folios_ser,
+ u64 *nr_foliosp)
+{
+ struct inode *inode = file_inode(file);
+ struct memfd_luo_folio_ser *folios_ser;
+ unsigned int max_folios;
+ long i, size, nr_pinned;
+ struct folio **folios;
+ int err = -EINVAL;
+ pgoff_t offset;
+ u64 nr_folios;
+
+ size = i_size_read(inode);
+ /*
+ * If the file has zero size, then the folios and nr_folios properties
+ * are not set.
+ */
+ if (!size) {
+ *nr_foliosp = 0;
+ *out_folios_ser = NULL;
+ memset(kho_vmalloc, 0, sizeof(*kho_vmalloc));
+ return 0;
+ }
+
+ /*
+ * Guess the number of folios based on inode size. Real number might end
+ * up being smaller if there are higher order folios.
+ */
+ max_folios = PAGE_ALIGN(size) / PAGE_SIZE;
+ folios = kvmalloc_array(max_folios, sizeof(*folios), GFP_KERNEL);
+ if (!folios)
+ return -ENOMEM;
+
+ /*
+ * Pin the folios so they don't move around behind our back. This also
+ * ensures none of the folios are in CMA -- which ensures they don't
+ * fall in KHO scratch memory. It also moves swapped out folios back to
+ * memory.
+ *
+ * A side effect of doing this is that it allocates a folio for all
+ * indices in the file. This might waste memory on sparse memfds. If
+ * that is really a problem in the future, we can have a
+ * memfd_pin_folios() variant that does not allocate a page on empty
+ * slots.
+ */
+ nr_pinned = memfd_pin_folios(file, 0, size - 1, folios, max_folios,
+ &offset);
+ if (nr_pinned < 0) {
+ err = nr_pinned;
+ pr_err("failed to pin folios: %d\n", err);
+ goto err_free_folios;
+ }
+ nr_folios = nr_pinned;
+
+ folios_ser = vcalloc(nr_folios, sizeof(*folios_ser));
+ if (!folios_ser) {
+ err = -ENOMEM;
+ goto err_unpin;
+ }
+
+ for (i = 0; i < nr_folios; i++) {
+ struct memfd_luo_folio_ser *pfolio = &folios_ser[i];
+ struct folio *folio = folios[i];
+ unsigned int flags = 0;
+
+ err = kho_preserve_folio(folio);
+ if (err)
+ goto err_unpreserve;
+
+ if (folio_test_dirty(folio))
+ flags |= MEMFD_LUO_FOLIO_DIRTY;
+ if (folio_test_uptodate(folio))
+ flags |= MEMFD_LUO_FOLIO_UPTODATE;
+
+ pfolio->pfn = folio_pfn(folio);
+ pfolio->flags = flags;
+ pfolio->index = folio->index;
+ }
+
+ err = kho_preserve_vmalloc(folios_ser, kho_vmalloc);
+ if (err)
+ goto err_unpreserve;
+
+ kvfree(folios);
+ *nr_foliosp = nr_folios;
+ *out_folios_ser = folios_ser;
+
+ /*
+ * Note: folios_ser is purposely not freed here. It is preserved
+ * memory (via KHO). In the 'unpreserve' path, we use the vmap pointer
+ * that is passed via private_data.
+ */
+ return 0;
+
+err_unpreserve:
+ for (i = i - 1; i >= 0; i--)
+ kho_unpreserve_folio(folios[i]);
+ vfree(folios_ser);
+err_unpin:
+ unpin_folios(folios, nr_folios);
+err_free_folios:
+ kvfree(folios);
+
+ return err;
+}
+
+static void memfd_luo_unpreserve_folios(struct kho_vmalloc *kho_vmalloc,
+ struct memfd_luo_folio_ser *folios_ser,
+ u64 nr_folios)
+{
+ long i;
+
+ if (!nr_folios)
+ return;
+
+ kho_unpreserve_vmalloc(kho_vmalloc);
+
+ for (i = 0; i < nr_folios; i++) {
+ const struct memfd_luo_folio_ser *pfolio = &folios_ser[i];
+ struct folio *folio;
+
+ if (!pfolio->pfn)
+ continue;
+
+ folio = pfn_folio(pfolio->pfn);
+
+ kho_unpreserve_folio(folio);
+ unpin_folio(folio);
+ }
+
+ vfree(folios_ser);
+}
+
+static int memfd_luo_preserve(struct liveupdate_file_op_args *args)
+{
+ struct inode *inode = file_inode(args->file);
+ struct memfd_luo_folio_ser *folios_ser;
+ struct memfd_luo_ser *ser;
+ u64 nr_folios;
+ int err = 0;
+
+ inode_lock(inode);
+ shmem_freeze(inode, true);
+
+ /* Allocate the main serialization structure in preserved memory */
+ ser = kho_alloc_preserve(sizeof(*ser));
+ if (IS_ERR(ser)) {
+ err = PTR_ERR(ser);
+ goto err_unlock;
+ }
+
+ ser->pos = args->file->f_pos;
+ ser->size = i_size_read(inode);
+
+ err = memfd_luo_preserve_folios(args->file, &ser->folios,
+ &folios_ser, &nr_folios);
+ if (err)
+ goto err_free_ser;
+
+ ser->nr_folios = nr_folios;
+ inode_unlock(inode);
+
+ args->private_data = folios_ser;
+ args->serialized_data = virt_to_phys(ser);
+
+ return 0;
+
+err_free_ser:
+ kho_unpreserve_free(ser);
+err_unlock:
+ shmem_freeze(inode, false);
+ inode_unlock(inode);
+ return err;
+}
+
+static int memfd_luo_freeze(struct liveupdate_file_op_args *args)
+{
+ struct memfd_luo_ser *ser;
+
+ if (WARN_ON_ONCE(!args->serialized_data))
+ return -EINVAL;
+
+ ser = phys_to_virt(args->serialized_data);
+
+ /*
+ * The pos might have changed since prepare. Everything else stays the
+ * same.
+ */
+ ser->pos = args->file->f_pos;
+
+ return 0;
+}
+
+static void memfd_luo_unpreserve(struct liveupdate_file_op_args *args)
+{
+ struct inode *inode = file_inode(args->file);
+ struct memfd_luo_ser *ser;
+
+ if (WARN_ON_ONCE(!args->serialized_data))
+ return;
+
+ inode_lock(inode);
+ shmem_freeze(inode, false);
+
+ ser = phys_to_virt(args->serialized_data);
+
+ memfd_luo_unpreserve_folios(&ser->folios, args->private_data,
+ ser->nr_folios);
+
+ kho_unpreserve_free(ser);
+ inode_unlock(inode);
+}
+
+static void memfd_luo_discard_folios(const struct memfd_luo_folio_ser *folios_ser,
+ u64 nr_folios)
+{
+ u64 i;
+
+ for (i = 0; i < nr_folios; i++) {
+ const struct memfd_luo_folio_ser *pfolio = &folios_ser[i];
+ struct folio *folio;
+ phys_addr_t phys;
+
+ if (!pfolio->pfn)
+ continue;
+
+ phys = PFN_PHYS(pfolio->pfn);
+ folio = kho_restore_folio(phys);
+ if (!folio) {
+ pr_warn_ratelimited("Unable to restore folio at physical address: %llx\n",
+ phys);
+ continue;
+ }
+
+ folio_put(folio);
+ }
+}
+
+static void memfd_luo_finish(struct liveupdate_file_op_args *args)
+{
+ struct memfd_luo_folio_ser *folios_ser;
+ struct memfd_luo_ser *ser;
+
+ if (args->retrieved)
+ return;
+
+ ser = phys_to_virt(args->serialized_data);
+ if (!ser)
+ return;
+
+ if (ser->nr_folios) {
+ folios_ser = kho_restore_vmalloc(&ser->folios);
+ if (!folios_ser)
+ goto out;
+
+ memfd_luo_discard_folios(folios_ser, ser->nr_folios);
+ vfree(folios_ser);
+ }
+
+out:
+ kho_restore_free(ser);
+}
+
+static int memfd_luo_retrieve_folios(struct file *file,
+ struct memfd_luo_folio_ser *folios_ser,
+ u64 nr_folios)
+{
+ struct inode *inode = file_inode(file);
+ struct address_space *mapping = inode->i_mapping;
+ struct folio *folio;
+ int err = -EIO;
+ long i;
+
+ for (i = 0; i < nr_folios; i++) {
+ const struct memfd_luo_folio_ser *pfolio = &folios_ser[i];
+ phys_addr_t phys;
+ u64 index;
+ int flags;
+
+ if (!pfolio->pfn)
+ continue;
+
+ phys = PFN_PHYS(pfolio->pfn);
+ folio = kho_restore_folio(phys);
+ if (!folio) {
+ pr_err("Unable to restore folio at physical address: %llx\n",
+ phys);
+ goto put_folios;
+ }
+ index = pfolio->index;
+ flags = pfolio->flags;
+
+ /* Set up the folio for insertion. */
+ __folio_set_locked(folio);
+ __folio_set_swapbacked(folio);
+
+ err = mem_cgroup_charge(folio, NULL, mapping_gfp_mask(mapping));
+ if (err) {
+ pr_err("shmem: failed to charge folio index %ld: %d\n",
+ i, err);
+ goto unlock_folio;
+ }
+
+ err = shmem_add_to_page_cache(folio, mapping, index, NULL,
+ mapping_gfp_mask(mapping));
+ if (err) {
+ pr_err("shmem: failed to add to page cache folio index %ld: %d\n",
+ i, err);
+ goto unlock_folio;
+ }
+
+ if (flags & MEMFD_LUO_FOLIO_UPTODATE)
+ folio_mark_uptodate(folio);
+ if (flags & MEMFD_LUO_FOLIO_DIRTY)
+ folio_mark_dirty(folio);
+
+ err = shmem_inode_acct_blocks(inode, 1);
+ if (err) {
+ pr_err("shmem: failed to account folio index %ld: %d\n",
+ i, err);
+ goto unlock_folio;
+ }
+
+ shmem_recalc_inode(inode, 1, 0);
+ folio_add_lru(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+
+ return 0;
+
+unlock_folio:
+ folio_unlock(folio);
+ folio_put(folio);
+put_folios:
+ /*
+ * Note: don't free the folios already added to the file. They will be
+ * freed when the file is freed. Free the ones not added yet here.
+ */
+ for (long j = i + 1; j < nr_folios; j++) {
+ const struct memfd_luo_folio_ser *pfolio = &folios_ser[j];
+
+ folio = kho_restore_folio(pfolio->pfn);
+ if (folio)
+ folio_put(folio);
+ }
+
+ return err;
+}
+
+static int memfd_luo_retrieve(struct liveupdate_file_op_args *args)
+{
+ struct memfd_luo_folio_ser *folios_ser;
+ struct memfd_luo_ser *ser;
+ struct file *file;
+ int err;
+
+ ser = phys_to_virt(args->serialized_data);
+ if (!ser)
+ return -EINVAL;
+
+ file = shmem_file_setup("", 0, VM_NORESERVE);
+
+ if (IS_ERR(file)) {
+ pr_err("failed to setup file: %pe\n", file);
+ return PTR_ERR(file);
+ }
+
+ vfs_setpos(file, ser->pos, MAX_LFS_FILESIZE);
+ file->f_inode->i_size = ser->size;
+
+ if (ser->nr_folios) {
+ folios_ser = kho_restore_vmalloc(&ser->folios);
+ if (!folios_ser) {
+ err = -EINVAL;
+ goto put_file;
+ }
+
+ err = memfd_luo_retrieve_folios(file, folios_ser, ser->nr_folios);
+ vfree(folios_ser);
+ if (err)
+ goto put_file;
+ }
+
+ args->file = file;
+ kho_restore_free(ser);
+
+ return 0;
+
+put_file:
+ fput(file);
+
+ return err;
+}
+
+static bool memfd_luo_can_preserve(struct liveupdate_file_handler *handler,
+ struct file *file)
+{
+ struct inode *inode = file_inode(file);
+
+ return shmem_file(file) && !inode->i_nlink;
+}
+
+static const struct liveupdate_file_ops memfd_luo_file_ops = {
+ .freeze = memfd_luo_freeze,
+ .finish = memfd_luo_finish,
+ .retrieve = memfd_luo_retrieve,
+ .preserve = memfd_luo_preserve,
+ .unpreserve = memfd_luo_unpreserve,
+ .can_preserve = memfd_luo_can_preserve,
+ .owner = THIS_MODULE,
+};
+
+static struct liveupdate_file_handler memfd_luo_handler = {
+ .ops = &memfd_luo_file_ops,
+ .compatible = MEMFD_LUO_FH_COMPATIBLE,
+};
+
+static int __init memfd_luo_init(void)
+{
+ int err = liveupdate_register_file_handler(&memfd_luo_handler);
+
+ if (err && err != -EOPNOTSUPP) {
+ pr_err("Could not register luo filesystem handler: %pe\n",
+ ERR_PTR(err));
+
+ return err;
+ }
+
+ return 0;
+}
+late_initcall(memfd_luo_init);
diff --git a/mm/shmem.c b/mm/shmem.c
index d578d8e765d7..3f194c9842a8 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -174,20 +174,20 @@ static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
*/
static inline int shmem_acct_size(unsigned long flags, loff_t size)
{
- return (flags & VM_NORESERVE) ?
+ return (flags & SHMEM_F_NORESERVE) ?
0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
}
static inline void shmem_unacct_size(unsigned long flags, loff_t size)
{
- if (!(flags & VM_NORESERVE))
+ if (!(flags & SHMEM_F_NORESERVE))
vm_unacct_memory(VM_ACCT(size));
}
static inline int shmem_reacct_size(unsigned long flags,
loff_t oldsize, loff_t newsize)
{
- if (!(flags & VM_NORESERVE)) {
+ if (!(flags & SHMEM_F_NORESERVE)) {
if (VM_ACCT(newsize) > VM_ACCT(oldsize))
return security_vm_enough_memory_mm(current->mm,
VM_ACCT(newsize) - VM_ACCT(oldsize));
@@ -205,7 +205,7 @@ static inline int shmem_reacct_size(unsigned long flags,
*/
static inline int shmem_acct_blocks(unsigned long flags, long pages)
{
- if (!(flags & VM_NORESERVE))
+ if (!(flags & SHMEM_F_NORESERVE))
return 0;
return security_vm_enough_memory_mm(current->mm,
@@ -214,11 +214,11 @@ static inline int shmem_acct_blocks(unsigned long flags, long pages)
static inline void shmem_unacct_blocks(unsigned long flags, long pages)
{
- if (flags & VM_NORESERVE)
+ if (flags & SHMEM_F_NORESERVE)
vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
}
-static int shmem_inode_acct_blocks(struct inode *inode, long pages)
+int shmem_inode_acct_blocks(struct inode *inode, long pages)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
@@ -434,7 +434,7 @@ static void shmem_free_inode(struct super_block *sb, size_t freed_ispace)
*
* Return: true if swapped was incremented from 0, for shmem_writeout().
*/
-static bool shmem_recalc_inode(struct inode *inode, long alloced, long swapped)
+bool shmem_recalc_inode(struct inode *inode, long alloced, long swapped)
{
struct shmem_inode_info *info = SHMEM_I(inode);
bool first_swapped = false;
@@ -878,9 +878,9 @@ static void shmem_update_stats(struct folio *folio, int nr_pages)
/*
* Somewhat like filemap_add_folio, but error if expected item has gone.
*/
-static int shmem_add_to_page_cache(struct folio *folio,
- struct address_space *mapping,
- pgoff_t index, void *expected, gfp_t gfp)
+int shmem_add_to_page_cache(struct folio *folio,
+ struct address_space *mapping,
+ pgoff_t index, void *expected, gfp_t gfp)
{
XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
unsigned long nr = folio_nr_pages(folio);
@@ -1314,6 +1314,8 @@ static int shmem_setattr(struct mnt_idmap *idmap,
return -EPERM;
if (newsize != oldsize) {
+ if (info->flags & SHMEM_F_MAPPING_FROZEN)
+ return -EPERM;
error = shmem_reacct_size(SHMEM_I(inode)->flags,
oldsize, newsize);
if (error)
@@ -1568,7 +1570,7 @@ int shmem_writeout(struct folio *folio, struct swap_iocb **plug,
int nr_pages;
bool split = false;
- if ((info->flags & VM_LOCKED) || sbinfo->noswap)
+ if ((info->flags & SHMEM_F_LOCKED) || sbinfo->noswap)
goto redirty;
if (!total_swap_pages)
@@ -2926,15 +2928,15 @@ int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
* ipc_lock_object() when called from shmctl_do_lock(),
* no serialization needed when called from shm_destroy().
*/
- if (lock && !(info->flags & VM_LOCKED)) {
+ if (lock && !(info->flags & SHMEM_F_LOCKED)) {
if (!user_shm_lock(inode->i_size, ucounts))
goto out_nomem;
- info->flags |= VM_LOCKED;
+ info->flags |= SHMEM_F_LOCKED;
mapping_set_unevictable(file->f_mapping);
}
- if (!lock && (info->flags & VM_LOCKED) && ucounts) {
+ if (!lock && (info->flags & SHMEM_F_LOCKED) && ucounts) {
user_shm_unlock(inode->i_size, ucounts);
- info->flags &= ~VM_LOCKED;
+ info->flags &= ~SHMEM_F_LOCKED;
mapping_clear_unevictable(file->f_mapping);
}
retval = 0;
@@ -3079,7 +3081,7 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
spin_lock_init(&info->lock);
atomic_set(&info->stop_eviction, 0);
info->seals = F_SEAL_SEAL;
- info->flags = flags & VM_NORESERVE;
+ info->flags = (flags & VM_NORESERVE) ? SHMEM_F_NORESERVE : 0;
info->i_crtime = inode_get_mtime(inode);
info->fsflags = (dir == NULL) ? 0 :
SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
@@ -3306,6 +3308,10 @@ shmem_write_begin(const struct kiocb *iocb, struct address_space *mapping,
return -EPERM;
}
+ if (unlikely((info->flags & SHMEM_F_MAPPING_FROZEN) &&
+ pos + len > inode->i_size))
+ return -EPERM;
+
ret = shmem_get_folio(inode, index, pos + len, &folio, SGP_WRITE);
if (ret)
return ret;
@@ -3679,6 +3685,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
inode_lock(inode);
+ if (info->flags & SHMEM_F_MAPPING_FROZEN) {
+ error = -EPERM;
+ goto out;
+ }
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
struct address_space *mapping = file->f_mapping;
loff_t unmap_start = round_up(offset, PAGE_SIZE);
@@ -5799,8 +5810,10 @@ static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
/* common code */
static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
- loff_t size, unsigned long flags, unsigned int i_flags)
+ loff_t size, unsigned long vm_flags,
+ unsigned int i_flags)
{
+ unsigned long flags = (vm_flags & VM_NORESERVE) ? SHMEM_F_NORESERVE : 0;
struct inode *inode;
struct file *res;
@@ -5817,7 +5830,7 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
return ERR_PTR(-ENOMEM);
inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
- S_IFREG | S_IRWXUGO, 0, flags);
+ S_IFREG | S_IRWXUGO, 0, vm_flags);
if (IS_ERR(inode)) {
shmem_unacct_size(flags, size);
return ERR_CAST(inode);