diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/Makefile | 1 | ||||
| -rw-r--r-- | mm/internal.h | 6 | ||||
| -rw-r--r-- | mm/memblock.c | 93 | ||||
| -rw-r--r-- | mm/memfd_luo.c | 516 | ||||
| -rw-r--r-- | mm/shmem.c | 49 |
5 files changed, 604 insertions, 61 deletions
diff --git a/mm/Makefile b/mm/Makefile index 00ceb2418b64..2d0570a16e5b 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -100,6 +100,7 @@ obj-$(CONFIG_NUMA) += memory-tiers.o obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o obj-$(CONFIG_PAGE_COUNTER) += page_counter.o +obj-$(CONFIG_LIVEUPDATE) += memfd_luo.o obj-$(CONFIG_MEMCG_V1) += memcontrol-v1.o obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o ifdef CONFIG_SWAP diff --git a/mm/internal.h b/mm/internal.h index 89790def1bae..e430da900430 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1582,6 +1582,12 @@ void __meminit __init_page_from_nid(unsigned long pfn, int nid); unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority); +int shmem_add_to_page_cache(struct folio *folio, + struct address_space *mapping, + pgoff_t index, void *expected, gfp_t gfp); +int shmem_inode_acct_blocks(struct inode *inode, long pages); +bool shmem_recalc_inode(struct inode *inode, long alloced, long swapped); + #ifdef CONFIG_SHRINKER_DEBUG static inline __printf(2, 0) int shrinker_debugfs_name_alloc( struct shrinker *shrinker, const char *fmt, va_list ap) diff --git a/mm/memblock.c b/mm/memblock.c index f0f2dc66e9a2..905d06b16348 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2445,60 +2445,59 @@ int reserve_mem_release_by_name(const char *name) #define MEMBLOCK_KHO_FDT "memblock" #define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1" #define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1" -static struct page *kho_fdt; -static int reserve_mem_kho_finalize(struct kho_serialization *ser) +static int __init reserved_mem_preserve(void) { - int err = 0, i; + unsigned int nr_preserved = 0; + int err; - for (i = 0; i < reserved_mem_count; i++) { + for (unsigned int i = 0; i < reserved_mem_count; i++, nr_preserved++) { struct reserve_mem_table *map = &reserved_mem_table[i]; struct page *page = phys_to_page(map->start); unsigned int nr_pages = map->size >> PAGE_SHIFT; - err |= kho_preserve_pages(page, nr_pages); + err = kho_preserve_pages(page, nr_pages); + if (err) + goto err_unpreserve; } - err |= kho_preserve_folio(page_folio(kho_fdt)); - err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt)); + return 0; - return notifier_from_errno(err); -} +err_unpreserve: + for (unsigned int i = 0; i < nr_preserved; i++) { + struct reserve_mem_table *map = &reserved_mem_table[i]; + struct page *page = phys_to_page(map->start); + unsigned int nr_pages = map->size >> PAGE_SHIFT; -static int reserve_mem_kho_notifier(struct notifier_block *self, - unsigned long cmd, void *v) -{ - switch (cmd) { - case KEXEC_KHO_FINALIZE: - return reserve_mem_kho_finalize((struct kho_serialization *)v); - case KEXEC_KHO_ABORT: - return NOTIFY_DONE; - default: - return NOTIFY_BAD; + kho_unpreserve_pages(page, nr_pages); } -} -static struct notifier_block reserve_mem_kho_nb = { - .notifier_call = reserve_mem_kho_notifier, -}; + return err; +} static int __init prepare_kho_fdt(void) { - int err = 0, i; + struct page *fdt_page; void *fdt; + int err; - kho_fdt = alloc_page(GFP_KERNEL); - if (!kho_fdt) - return -ENOMEM; + fdt_page = alloc_page(GFP_KERNEL); + if (!fdt_page) { + err = -ENOMEM; + goto err_report; + } - fdt = page_to_virt(kho_fdt); + fdt = page_to_virt(fdt_page); + err = kho_preserve_pages(fdt_page, 1); + if (err) + goto err_free_fdt; err |= fdt_create(fdt, PAGE_SIZE); err |= fdt_finish_reservemap(fdt); - err |= fdt_begin_node(fdt, ""); err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE); - for (i = 0; i < reserved_mem_count; i++) { + + for (unsigned int i = 0; !err && i < reserved_mem_count; i++) { struct reserve_mem_table *map = &reserved_mem_table[i]; err |= fdt_begin_node(fdt, map->name); @@ -2508,14 +2507,29 @@ static int __init prepare_kho_fdt(void) err |= fdt_end_node(fdt); } err |= fdt_end_node(fdt); - err |= fdt_finish(fdt); - if (err) { - pr_err("failed to prepare memblock FDT for KHO: %d\n", err); - put_page(kho_fdt); - kho_fdt = NULL; - } + if (err) + goto err_unpreserve_fdt; + + err = kho_add_subtree(MEMBLOCK_KHO_FDT, fdt); + if (err) + goto err_unpreserve_fdt; + + err = reserved_mem_preserve(); + if (err) + goto err_remove_subtree; + + return 0; + +err_remove_subtree: + kho_remove_subtree(fdt); +err_unpreserve_fdt: + kho_unpreserve_pages(fdt_page, 1); +err_free_fdt: + put_page(fdt_page); +err_report: + pr_err("failed to prepare memblock FDT for KHO: %d\n", err); return err; } @@ -2530,13 +2544,6 @@ static int __init reserve_mem_init(void) err = prepare_kho_fdt(); if (err) return err; - - err = register_kho_notifier(&reserve_mem_kho_nb); - if (err) { - put_page(kho_fdt); - kho_fdt = NULL; - } - return err; } late_initcall(reserve_mem_init); diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c new file mode 100644 index 000000000000..4f6ba63b4310 --- /dev/null +++ b/mm/memfd_luo.c @@ -0,0 +1,516 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin <pasha.tatashin@soleen.com> + * + * Copyright (C) 2025 Amazon.com Inc. or its affiliates. + * Pratyush Yadav <ptyadav@amazon.de> + */ + +/** + * DOC: Memfd Preservation via LUO + * + * Overview + * ======== + * + * Memory file descriptors (memfd) can be preserved over a kexec using the Live + * Update Orchestrator (LUO) file preservation. This allows userspace to + * transfer its memory contents to the next kernel after a kexec. + * + * The preservation is not intended to be transparent. Only select properties of + * the file are preserved. All others are reset to default. The preserved + * properties are described below. + * + * .. note:: + * The LUO API is not stabilized yet, so the preserved properties of a memfd + * are also not stable and are subject to backwards incompatible changes. + * + * .. note:: + * Currently a memfd backed by Hugetlb is not supported. Memfds created + * with ``MFD_HUGETLB`` will be rejected. + * + * Preserved Properties + * ==================== + * + * The following properties of the memfd are preserved across kexec: + * + * File Contents + * All data stored in the file is preserved. + * + * File Size + * The size of the file is preserved. Holes in the file are filled by + * allocating pages for them during preservation. + * + * File Position + * The current file position is preserved, allowing applications to continue + * reading/writing from their last position. + * + * File Status Flags + * memfds are always opened with ``O_RDWR`` and ``O_LARGEFILE``. This property + * is maintained. + * + * Non-Preserved Properties + * ======================== + * + * All properties which are not preserved must be assumed to be reset to + * default. This section describes some of those properties which may be more of + * note. + * + * ``FD_CLOEXEC`` flag + * A memfd can be created with the ``MFD_CLOEXEC`` flag that sets the + * ``FD_CLOEXEC`` on the file. This flag is not preserved and must be set + * again after restore via ``fcntl()``. + * + * Seals + * File seals are not preserved. The file is unsealed on restore and if + * needed, must be sealed again via ``fcntl()``. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/bits.h> +#include <linux/err.h> +#include <linux/file.h> +#include <linux/io.h> +#include <linux/kexec_handover.h> +#include <linux/kho/abi/memfd.h> +#include <linux/liveupdate.h> +#include <linux/shmem_fs.h> +#include <linux/vmalloc.h> +#include "internal.h" + +static int memfd_luo_preserve_folios(struct file *file, + struct kho_vmalloc *kho_vmalloc, + struct memfd_luo_folio_ser **out_folios_ser, + u64 *nr_foliosp) +{ + struct inode *inode = file_inode(file); + struct memfd_luo_folio_ser *folios_ser; + unsigned int max_folios; + long i, size, nr_pinned; + struct folio **folios; + int err = -EINVAL; + pgoff_t offset; + u64 nr_folios; + + size = i_size_read(inode); + /* + * If the file has zero size, then the folios and nr_folios properties + * are not set. + */ + if (!size) { + *nr_foliosp = 0; + *out_folios_ser = NULL; + memset(kho_vmalloc, 0, sizeof(*kho_vmalloc)); + return 0; + } + + /* + * Guess the number of folios based on inode size. Real number might end + * up being smaller if there are higher order folios. + */ + max_folios = PAGE_ALIGN(size) / PAGE_SIZE; + folios = kvmalloc_array(max_folios, sizeof(*folios), GFP_KERNEL); + if (!folios) + return -ENOMEM; + + /* + * Pin the folios so they don't move around behind our back. This also + * ensures none of the folios are in CMA -- which ensures they don't + * fall in KHO scratch memory. It also moves swapped out folios back to + * memory. + * + * A side effect of doing this is that it allocates a folio for all + * indices in the file. This might waste memory on sparse memfds. If + * that is really a problem in the future, we can have a + * memfd_pin_folios() variant that does not allocate a page on empty + * slots. + */ + nr_pinned = memfd_pin_folios(file, 0, size - 1, folios, max_folios, + &offset); + if (nr_pinned < 0) { + err = nr_pinned; + pr_err("failed to pin folios: %d\n", err); + goto err_free_folios; + } + nr_folios = nr_pinned; + + folios_ser = vcalloc(nr_folios, sizeof(*folios_ser)); + if (!folios_ser) { + err = -ENOMEM; + goto err_unpin; + } + + for (i = 0; i < nr_folios; i++) { + struct memfd_luo_folio_ser *pfolio = &folios_ser[i]; + struct folio *folio = folios[i]; + unsigned int flags = 0; + + err = kho_preserve_folio(folio); + if (err) + goto err_unpreserve; + + if (folio_test_dirty(folio)) + flags |= MEMFD_LUO_FOLIO_DIRTY; + if (folio_test_uptodate(folio)) + flags |= MEMFD_LUO_FOLIO_UPTODATE; + + pfolio->pfn = folio_pfn(folio); + pfolio->flags = flags; + pfolio->index = folio->index; + } + + err = kho_preserve_vmalloc(folios_ser, kho_vmalloc); + if (err) + goto err_unpreserve; + + kvfree(folios); + *nr_foliosp = nr_folios; + *out_folios_ser = folios_ser; + + /* + * Note: folios_ser is purposely not freed here. It is preserved + * memory (via KHO). In the 'unpreserve' path, we use the vmap pointer + * that is passed via private_data. + */ + return 0; + +err_unpreserve: + for (i = i - 1; i >= 0; i--) + kho_unpreserve_folio(folios[i]); + vfree(folios_ser); +err_unpin: + unpin_folios(folios, nr_folios); +err_free_folios: + kvfree(folios); + + return err; +} + +static void memfd_luo_unpreserve_folios(struct kho_vmalloc *kho_vmalloc, + struct memfd_luo_folio_ser *folios_ser, + u64 nr_folios) +{ + long i; + + if (!nr_folios) + return; + + kho_unpreserve_vmalloc(kho_vmalloc); + + for (i = 0; i < nr_folios; i++) { + const struct memfd_luo_folio_ser *pfolio = &folios_ser[i]; + struct folio *folio; + + if (!pfolio->pfn) + continue; + + folio = pfn_folio(pfolio->pfn); + + kho_unpreserve_folio(folio); + unpin_folio(folio); + } + + vfree(folios_ser); +} + +static int memfd_luo_preserve(struct liveupdate_file_op_args *args) +{ + struct inode *inode = file_inode(args->file); + struct memfd_luo_folio_ser *folios_ser; + struct memfd_luo_ser *ser; + u64 nr_folios; + int err = 0; + + inode_lock(inode); + shmem_freeze(inode, true); + + /* Allocate the main serialization structure in preserved memory */ + ser = kho_alloc_preserve(sizeof(*ser)); + if (IS_ERR(ser)) { + err = PTR_ERR(ser); + goto err_unlock; + } + + ser->pos = args->file->f_pos; + ser->size = i_size_read(inode); + + err = memfd_luo_preserve_folios(args->file, &ser->folios, + &folios_ser, &nr_folios); + if (err) + goto err_free_ser; + + ser->nr_folios = nr_folios; + inode_unlock(inode); + + args->private_data = folios_ser; + args->serialized_data = virt_to_phys(ser); + + return 0; + +err_free_ser: + kho_unpreserve_free(ser); +err_unlock: + shmem_freeze(inode, false); + inode_unlock(inode); + return err; +} + +static int memfd_luo_freeze(struct liveupdate_file_op_args *args) +{ + struct memfd_luo_ser *ser; + + if (WARN_ON_ONCE(!args->serialized_data)) + return -EINVAL; + + ser = phys_to_virt(args->serialized_data); + + /* + * The pos might have changed since prepare. Everything else stays the + * same. + */ + ser->pos = args->file->f_pos; + + return 0; +} + +static void memfd_luo_unpreserve(struct liveupdate_file_op_args *args) +{ + struct inode *inode = file_inode(args->file); + struct memfd_luo_ser *ser; + + if (WARN_ON_ONCE(!args->serialized_data)) + return; + + inode_lock(inode); + shmem_freeze(inode, false); + + ser = phys_to_virt(args->serialized_data); + + memfd_luo_unpreserve_folios(&ser->folios, args->private_data, + ser->nr_folios); + + kho_unpreserve_free(ser); + inode_unlock(inode); +} + +static void memfd_luo_discard_folios(const struct memfd_luo_folio_ser *folios_ser, + u64 nr_folios) +{ + u64 i; + + for (i = 0; i < nr_folios; i++) { + const struct memfd_luo_folio_ser *pfolio = &folios_ser[i]; + struct folio *folio; + phys_addr_t phys; + + if (!pfolio->pfn) + continue; + + phys = PFN_PHYS(pfolio->pfn); + folio = kho_restore_folio(phys); + if (!folio) { + pr_warn_ratelimited("Unable to restore folio at physical address: %llx\n", + phys); + continue; + } + + folio_put(folio); + } +} + +static void memfd_luo_finish(struct liveupdate_file_op_args *args) +{ + struct memfd_luo_folio_ser *folios_ser; + struct memfd_luo_ser *ser; + + if (args->retrieved) + return; + + ser = phys_to_virt(args->serialized_data); + if (!ser) + return; + + if (ser->nr_folios) { + folios_ser = kho_restore_vmalloc(&ser->folios); + if (!folios_ser) + goto out; + + memfd_luo_discard_folios(folios_ser, ser->nr_folios); + vfree(folios_ser); + } + +out: + kho_restore_free(ser); +} + +static int memfd_luo_retrieve_folios(struct file *file, + struct memfd_luo_folio_ser *folios_ser, + u64 nr_folios) +{ + struct inode *inode = file_inode(file); + struct address_space *mapping = inode->i_mapping; + struct folio *folio; + int err = -EIO; + long i; + + for (i = 0; i < nr_folios; i++) { + const struct memfd_luo_folio_ser *pfolio = &folios_ser[i]; + phys_addr_t phys; + u64 index; + int flags; + + if (!pfolio->pfn) + continue; + + phys = PFN_PHYS(pfolio->pfn); + folio = kho_restore_folio(phys); + if (!folio) { + pr_err("Unable to restore folio at physical address: %llx\n", + phys); + goto put_folios; + } + index = pfolio->index; + flags = pfolio->flags; + + /* Set up the folio for insertion. */ + __folio_set_locked(folio); + __folio_set_swapbacked(folio); + + err = mem_cgroup_charge(folio, NULL, mapping_gfp_mask(mapping)); + if (err) { + pr_err("shmem: failed to charge folio index %ld: %d\n", + i, err); + goto unlock_folio; + } + + err = shmem_add_to_page_cache(folio, mapping, index, NULL, + mapping_gfp_mask(mapping)); + if (err) { + pr_err("shmem: failed to add to page cache folio index %ld: %d\n", + i, err); + goto unlock_folio; + } + + if (flags & MEMFD_LUO_FOLIO_UPTODATE) + folio_mark_uptodate(folio); + if (flags & MEMFD_LUO_FOLIO_DIRTY) + folio_mark_dirty(folio); + + err = shmem_inode_acct_blocks(inode, 1); + if (err) { + pr_err("shmem: failed to account folio index %ld: %d\n", + i, err); + goto unlock_folio; + } + + shmem_recalc_inode(inode, 1, 0); + folio_add_lru(folio); + folio_unlock(folio); + folio_put(folio); + } + + return 0; + +unlock_folio: + folio_unlock(folio); + folio_put(folio); +put_folios: + /* + * Note: don't free the folios already added to the file. They will be + * freed when the file is freed. Free the ones not added yet here. + */ + for (long j = i + 1; j < nr_folios; j++) { + const struct memfd_luo_folio_ser *pfolio = &folios_ser[j]; + + folio = kho_restore_folio(pfolio->pfn); + if (folio) + folio_put(folio); + } + + return err; +} + +static int memfd_luo_retrieve(struct liveupdate_file_op_args *args) +{ + struct memfd_luo_folio_ser *folios_ser; + struct memfd_luo_ser *ser; + struct file *file; + int err; + + ser = phys_to_virt(args->serialized_data); + if (!ser) + return -EINVAL; + + file = shmem_file_setup("", 0, VM_NORESERVE); + + if (IS_ERR(file)) { + pr_err("failed to setup file: %pe\n", file); + return PTR_ERR(file); + } + + vfs_setpos(file, ser->pos, MAX_LFS_FILESIZE); + file->f_inode->i_size = ser->size; + + if (ser->nr_folios) { + folios_ser = kho_restore_vmalloc(&ser->folios); + if (!folios_ser) { + err = -EINVAL; + goto put_file; + } + + err = memfd_luo_retrieve_folios(file, folios_ser, ser->nr_folios); + vfree(folios_ser); + if (err) + goto put_file; + } + + args->file = file; + kho_restore_free(ser); + + return 0; + +put_file: + fput(file); + + return err; +} + +static bool memfd_luo_can_preserve(struct liveupdate_file_handler *handler, + struct file *file) +{ + struct inode *inode = file_inode(file); + + return shmem_file(file) && !inode->i_nlink; +} + +static const struct liveupdate_file_ops memfd_luo_file_ops = { + .freeze = memfd_luo_freeze, + .finish = memfd_luo_finish, + .retrieve = memfd_luo_retrieve, + .preserve = memfd_luo_preserve, + .unpreserve = memfd_luo_unpreserve, + .can_preserve = memfd_luo_can_preserve, + .owner = THIS_MODULE, +}; + +static struct liveupdate_file_handler memfd_luo_handler = { + .ops = &memfd_luo_file_ops, + .compatible = MEMFD_LUO_FH_COMPATIBLE, +}; + +static int __init memfd_luo_init(void) +{ + int err = liveupdate_register_file_handler(&memfd_luo_handler); + + if (err && err != -EOPNOTSUPP) { + pr_err("Could not register luo filesystem handler: %pe\n", + ERR_PTR(err)); + + return err; + } + + return 0; +} +late_initcall(memfd_luo_init); diff --git a/mm/shmem.c b/mm/shmem.c index d578d8e765d7..3f194c9842a8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -174,20 +174,20 @@ static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) */ static inline int shmem_acct_size(unsigned long flags, loff_t size) { - return (flags & VM_NORESERVE) ? + return (flags & SHMEM_F_NORESERVE) ? 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size)); } static inline void shmem_unacct_size(unsigned long flags, loff_t size) { - if (!(flags & VM_NORESERVE)) + if (!(flags & SHMEM_F_NORESERVE)) vm_unacct_memory(VM_ACCT(size)); } static inline int shmem_reacct_size(unsigned long flags, loff_t oldsize, loff_t newsize) { - if (!(flags & VM_NORESERVE)) { + if (!(flags & SHMEM_F_NORESERVE)) { if (VM_ACCT(newsize) > VM_ACCT(oldsize)) return security_vm_enough_memory_mm(current->mm, VM_ACCT(newsize) - VM_ACCT(oldsize)); @@ -205,7 +205,7 @@ static inline int shmem_reacct_size(unsigned long flags, */ static inline int shmem_acct_blocks(unsigned long flags, long pages) { - if (!(flags & VM_NORESERVE)) + if (!(flags & SHMEM_F_NORESERVE)) return 0; return security_vm_enough_memory_mm(current->mm, @@ -214,11 +214,11 @@ static inline int shmem_acct_blocks(unsigned long flags, long pages) static inline void shmem_unacct_blocks(unsigned long flags, long pages) { - if (flags & VM_NORESERVE) + if (flags & SHMEM_F_NORESERVE) vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE)); } -static int shmem_inode_acct_blocks(struct inode *inode, long pages) +int shmem_inode_acct_blocks(struct inode *inode, long pages) { struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); @@ -434,7 +434,7 @@ static void shmem_free_inode(struct super_block *sb, size_t freed_ispace) * * Return: true if swapped was incremented from 0, for shmem_writeout(). */ -static bool shmem_recalc_inode(struct inode *inode, long alloced, long swapped) +bool shmem_recalc_inode(struct inode *inode, long alloced, long swapped) { struct shmem_inode_info *info = SHMEM_I(inode); bool first_swapped = false; @@ -878,9 +878,9 @@ static void shmem_update_stats(struct folio *folio, int nr_pages) /* * Somewhat like filemap_add_folio, but error if expected item has gone. */ -static int shmem_add_to_page_cache(struct folio *folio, - struct address_space *mapping, - pgoff_t index, void *expected, gfp_t gfp) +int shmem_add_to_page_cache(struct folio *folio, + struct address_space *mapping, + pgoff_t index, void *expected, gfp_t gfp) { XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio)); unsigned long nr = folio_nr_pages(folio); @@ -1314,6 +1314,8 @@ static int shmem_setattr(struct mnt_idmap *idmap, return -EPERM; if (newsize != oldsize) { + if (info->flags & SHMEM_F_MAPPING_FROZEN) + return -EPERM; error = shmem_reacct_size(SHMEM_I(inode)->flags, oldsize, newsize); if (error) @@ -1568,7 +1570,7 @@ int shmem_writeout(struct folio *folio, struct swap_iocb **plug, int nr_pages; bool split = false; - if ((info->flags & VM_LOCKED) || sbinfo->noswap) + if ((info->flags & SHMEM_F_LOCKED) || sbinfo->noswap) goto redirty; if (!total_swap_pages) @@ -2926,15 +2928,15 @@ int shmem_lock(struct file *file, int lock, struct ucounts *ucounts) * ipc_lock_object() when called from shmctl_do_lock(), * no serialization needed when called from shm_destroy(). */ - if (lock && !(info->flags & VM_LOCKED)) { + if (lock && !(info->flags & SHMEM_F_LOCKED)) { if (!user_shm_lock(inode->i_size, ucounts)) goto out_nomem; - info->flags |= VM_LOCKED; + info->flags |= SHMEM_F_LOCKED; mapping_set_unevictable(file->f_mapping); } - if (!lock && (info->flags & VM_LOCKED) && ucounts) { + if (!lock && (info->flags & SHMEM_F_LOCKED) && ucounts) { user_shm_unlock(inode->i_size, ucounts); - info->flags &= ~VM_LOCKED; + info->flags &= ~SHMEM_F_LOCKED; mapping_clear_unevictable(file->f_mapping); } retval = 0; @@ -3079,7 +3081,7 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap, spin_lock_init(&info->lock); atomic_set(&info->stop_eviction, 0); info->seals = F_SEAL_SEAL; - info->flags = flags & VM_NORESERVE; + info->flags = (flags & VM_NORESERVE) ? SHMEM_F_NORESERVE : 0; info->i_crtime = inode_get_mtime(inode); info->fsflags = (dir == NULL) ? 0 : SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED; @@ -3306,6 +3308,10 @@ shmem_write_begin(const struct kiocb *iocb, struct address_space *mapping, return -EPERM; } + if (unlikely((info->flags & SHMEM_F_MAPPING_FROZEN) && + pos + len > inode->i_size)) + return -EPERM; + ret = shmem_get_folio(inode, index, pos + len, &folio, SGP_WRITE); if (ret) return ret; @@ -3679,6 +3685,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, inode_lock(inode); + if (info->flags & SHMEM_F_MAPPING_FROZEN) { + error = -EPERM; + goto out; + } + if (mode & FALLOC_FL_PUNCH_HOLE) { struct address_space *mapping = file->f_mapping; loff_t unmap_start = round_up(offset, PAGE_SIZE); @@ -5799,8 +5810,10 @@ static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, /* common code */ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, - loff_t size, unsigned long flags, unsigned int i_flags) + loff_t size, unsigned long vm_flags, + unsigned int i_flags) { + unsigned long flags = (vm_flags & VM_NORESERVE) ? SHMEM_F_NORESERVE : 0; struct inode *inode; struct file *res; @@ -5817,7 +5830,7 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, return ERR_PTR(-ENOMEM); inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL, - S_IFREG | S_IRWXUGO, 0, flags); + S_IFREG | S_IRWXUGO, 0, vm_flags); if (IS_ERR(inode)) { shmem_unacct_size(flags, size); return ERR_CAST(inode); |
