From be5498cac2ddb112c5bd7433d5e834a1a2493427 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Jun 2024 23:58:44 -0400 Subject: remove pointless includes of some of those used to be needed, some had been cargo-culted for no reason... Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- kernel/module/dups.c | 1 - kernel/module/kmod.c | 1 - 2 files changed, 2 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/dups.c b/kernel/module/dups.c index 9a92f2f8c9d3..bd2149fbe117 100644 --- a/kernel/module/dups.c +++ b/kernel/module/dups.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/module/kmod.c b/kernel/module/kmod.c index 0800d9891692..25f253812512 100644 --- a/kernel/module/kmod.c +++ b/kernel/module/kmod.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From f43922162184f2bd54d87c1b5e97cf72d0dd1290 Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Tue, 15 Oct 2024 23:16:35 +0000 Subject: module: Take const arg in validate_section_offset `validate_section_offset` doesn't modify the info passed in. Make this clear by adjusting the type signature. Signed-off-by: Matthew Maurer Reviewed-by: Sami Tolvanen Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index 49b9bca9de12..1a2dd52147ba 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1645,7 +1645,7 @@ bool __weak module_exit_section(const char *name) return strstarts(name, ".exit"); } -static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr) +static int validate_section_offset(const struct load_info *info, Elf_Shdr *shdr) { #if defined(CONFIG_64BIT) unsigned long long secend; -- cgit v1.2.3 From 90f8f312db720dbabec7c6258ef580b50129cc21 Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Tue, 15 Oct 2024 23:16:36 +0000 Subject: module: Factor out elf_validity_ehdr Factor out verification of the ELF header and document what is checked. Signed-off-by: Matthew Maurer Reviewed-by: Sami Tolvanen Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 70 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 23 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index 1a2dd52147ba..59c977acfb44 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1664,6 +1664,50 @@ static int validate_section_offset(const struct load_info *info, Elf_Shdr *shdr) return 0; } +/** + * elf_validity_ehdr() - Checks an ELF header for module validity + * @info: Load info containing the ELF header to check + * + * Checks whether an ELF header could belong to a valid module. Checks: + * + * * ELF header is within the data the user provided + * * ELF magic is present + * * It is relocatable (not final linked, not core file, etc.) + * * The header's machine type matches what the architecture expects. + * * Optional arch-specific hook for other properties + * - module_elf_check_arch() is currently only used by PPC to check + * ELF ABI version, but may be used by others in the future. + * + * Return: %0 if valid, %-ENOEXEC on failure. + */ +static int elf_validity_ehdr(const struct load_info *info) +{ + if (info->len < sizeof(*(info->hdr))) { + pr_err("Invalid ELF header len %lu\n", info->len); + return -ENOEXEC; + } + if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0) { + pr_err("Invalid ELF header magic: != %s\n", ELFMAG); + return -ENOEXEC; + } + if (info->hdr->e_type != ET_REL) { + pr_err("Invalid ELF header type: %u != %u\n", + info->hdr->e_type, ET_REL); + return -ENOEXEC; + } + if (!elf_check_arch(info->hdr)) { + pr_err("Invalid architecture in ELF header: %u\n", + info->hdr->e_machine); + return -ENOEXEC; + } + if (!module_elf_check_arch(info->hdr)) { + pr_err("Invalid module architecture in ELF header: %u\n", + info->hdr->e_machine); + return -ENOEXEC; + } + return 0; +} + /* * Check userspace passed ELF module against our expectations, and cache * useful variables for further processing as we go. @@ -1693,30 +1737,10 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) unsigned int num_info_secs = 0, info_idx; unsigned int num_sym_secs = 0, sym_idx; - if (info->len < sizeof(*(info->hdr))) { - pr_err("Invalid ELF header len %lu\n", info->len); - goto no_exec; - } + err = elf_validity_ehdr(info); + if (err < 0) + return err; - if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0) { - pr_err("Invalid ELF header magic: != %s\n", ELFMAG); - goto no_exec; - } - if (info->hdr->e_type != ET_REL) { - pr_err("Invalid ELF header type: %u != %u\n", - info->hdr->e_type, ET_REL); - goto no_exec; - } - if (!elf_check_arch(info->hdr)) { - pr_err("Invalid architecture in ELF header: %u\n", - info->hdr->e_machine); - goto no_exec; - } - if (!module_elf_check_arch(info->hdr)) { - pr_err("Invalid module architecture in ELF header: %u\n", - info->hdr->e_machine); - goto no_exec; - } if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) { pr_err("Invalid ELF section header size\n"); goto no_exec; -- cgit v1.2.3 From c92aab819d56d51631f0484ed7af11d9d8ff4cb0 Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Tue, 15 Oct 2024 23:16:37 +0000 Subject: module: Factor out elf_validity_cache_sechdrs Factor out and document the validation of section headers. Because we now validate all section offsets and lengths before accessing them, we can remove the ad-hoc checks. Signed-off-by: Matthew Maurer Reviewed-by: Sami Tolvanen Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 125 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 82 insertions(+), 43 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index 59c977acfb44..1f3a07ee59c6 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1708,6 +1708,87 @@ static int elf_validity_ehdr(const struct load_info *info) return 0; } +/** + * elf_validity_cache_sechdrs() - Cache section headers if valid + * @info: Load info to compute section headers from + * + * Checks: + * + * * ELF header is valid (see elf_validity_ehdr()) + * * Section headers are the size we expect + * * Section array fits in the user provided data + * * Section index 0 is NULL + * * Section contents are inbounds + * + * Then updates @info with a &load_info->sechdrs pointer if valid. + * + * Return: %0 if valid, negative error code if validation failed. + */ +static int elf_validity_cache_sechdrs(struct load_info *info) +{ + Elf_Shdr *sechdrs; + Elf_Shdr *shdr; + int i; + int err; + + err = elf_validity_ehdr(info); + if (err < 0) + return err; + + if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) { + pr_err("Invalid ELF section header size\n"); + return -ENOEXEC; + } + + /* + * e_shnum is 16 bits, and sizeof(Elf_Shdr) is + * known and small. So e_shnum * sizeof(Elf_Shdr) + * will not overflow unsigned long on any platform. + */ + if (info->hdr->e_shoff >= info->len + || (info->hdr->e_shnum * sizeof(Elf_Shdr) > + info->len - info->hdr->e_shoff)) { + pr_err("Invalid ELF section header overflow\n"); + return -ENOEXEC; + } + + sechdrs = (void *)info->hdr + info->hdr->e_shoff; + + /* + * The code assumes that section 0 has a length of zero and + * an addr of zero, so check for it. + */ + if (sechdrs[0].sh_type != SHT_NULL + || sechdrs[0].sh_size != 0 + || sechdrs[0].sh_addr != 0) { + pr_err("ELF Spec violation: section 0 type(%d)!=SH_NULL or non-zero len or addr\n", + sechdrs[0].sh_type); + return -ENOEXEC; + } + + /* Validate contents are inbounds */ + for (i = 1; i < info->hdr->e_shnum; i++) { + shdr = &sechdrs[i]; + switch (shdr->sh_type) { + case SHT_NULL: + case SHT_NOBITS: + /* No contents, offset/size don't mean anything */ + continue; + default: + err = validate_section_offset(info, shdr); + if (err < 0) { + pr_err("Invalid ELF section in module (section %u type %u)\n", + i, shdr->sh_type); + return err; + } + } + } + + info->sechdrs = sechdrs; + + return 0; +} + /* * Check userspace passed ELF module against our expectations, and cache * useful variables for further processing as we go. @@ -1737,29 +1818,10 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) unsigned int num_info_secs = 0, info_idx; unsigned int num_sym_secs = 0, sym_idx; - err = elf_validity_ehdr(info); + err = elf_validity_cache_sechdrs(info); if (err < 0) return err; - if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) { - pr_err("Invalid ELF section header size\n"); - goto no_exec; - } - - /* - * e_shnum is 16 bits, and sizeof(Elf_Shdr) is - * known and small. So e_shnum * sizeof(Elf_Shdr) - * will not overflow unsigned long on any platform. - */ - if (info->hdr->e_shoff >= info->len - || (info->hdr->e_shnum * sizeof(Elf_Shdr) > - info->len - info->hdr->e_shoff)) { - pr_err("Invalid ELF section header overflow\n"); - goto no_exec; - } - - info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; - /* * Verify if the section name table index is valid. */ @@ -1772,11 +1834,6 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) } strhdr = &info->sechdrs[info->hdr->e_shstrndx]; - err = validate_section_offset(info, strhdr); - if (err < 0) { - pr_err("Invalid ELF section hdr(type %u)\n", strhdr->sh_type); - return err; - } /* * The section name table must be NUL-terminated, as required @@ -1793,18 +1850,6 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) goto no_exec; } - /* - * The code assumes that section 0 has a length of zero and - * an addr of zero, so check for it. - */ - if (info->sechdrs[0].sh_type != SHT_NULL - || info->sechdrs[0].sh_size != 0 - || info->sechdrs[0].sh_addr != 0) { - pr_err("ELF Spec violation: section 0 type(%d)!=SH_NULL or non-zero len or addr\n", - info->sechdrs[0].sh_type); - goto no_exec; - } - for (i = 1; i < info->hdr->e_shnum; i++) { shdr = &info->sechdrs[i]; switch (shdr->sh_type) { @@ -1823,12 +1868,6 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) sym_idx = i; fallthrough; default: - err = validate_section_offset(info, shdr); - if (err < 0) { - pr_err("Invalid ELF section in module (section %u type %u)\n", - i, shdr->sh_type); - return err; - } if (strcmp(info->secstrings + shdr->sh_name, ".gnu.linkonce.this_module") == 0) { num_mod_secs++; -- cgit v1.2.3 From 3c5700aeabd87e81d9153a7666b28d0e405c6c88 Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Tue, 15 Oct 2024 23:16:38 +0000 Subject: module: Factor out elf_validity_cache_secstrings Factor out the validation of section names. There are two behavioral changes: 1. Previously, we did not validate non-SHF_ALLOC sections. This may have once been safe, as find_sec skips non-SHF_ALLOC sections, but find_any_sec, which will be used to load BTF if that is enabled, ignores the SHF_ALLOC flag. Since there's no need to support invalid section names, validate all of them, not just SHF_ALLOC sections. 2. Section names were validated *after* accessing them for the purposes of detecting ".modinfo" and ".gnu.linkonce.this_module". They are now checked prior to the access, which could avoid bad accesses with malformed modules. Signed-off-by: Matthew Maurer Reviewed-by: Sami Tolvanen Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 106 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 37 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index 1f3a07ee59c6..6a9159afca02 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1789,6 +1789,71 @@ static int elf_validity_cache_sechdrs(struct load_info *info) return 0; } +/** + * elf_validity_cache_secstrings() - Caches section names if valid + * @info: Load info to cache section names from. Must have valid sechdrs. + * + * Specifically checks: + * + * * Section name table index is inbounds of section headers + * * Section name table is not empty + * * Section name table is NUL terminated + * * All section name offsets are inbounds of the section + * + * Then updates @info with a &load_info->secstrings pointer if valid. + * + * Return: %0 if valid, negative error code if validation failed. + */ +static int elf_validity_cache_secstrings(struct load_info *info) +{ + Elf_Shdr *strhdr, *shdr; + char *secstrings; + int i; + + /* + * Verify if the section name table index is valid. + */ + if (info->hdr->e_shstrndx == SHN_UNDEF + || info->hdr->e_shstrndx >= info->hdr->e_shnum) { + pr_err("Invalid ELF section name index: %d || e_shstrndx (%d) >= e_shnum (%d)\n", + info->hdr->e_shstrndx, info->hdr->e_shstrndx, + info->hdr->e_shnum); + return -ENOEXEC; + } + + strhdr = &info->sechdrs[info->hdr->e_shstrndx]; + + /* + * The section name table must be NUL-terminated, as required + * by the spec. This makes strcmp and pr_* calls that access + * strings in the section safe. + */ + secstrings = (void *)info->hdr + strhdr->sh_offset; + if (strhdr->sh_size == 0) { + pr_err("empty section name table\n"); + return -ENOEXEC; + } + if (secstrings[strhdr->sh_size - 1] != '\0') { + pr_err("ELF Spec violation: section name table isn't null terminated\n"); + return -ENOEXEC; + } + + for (i = 0; i < info->hdr->e_shnum; i++) { + shdr = &info->sechdrs[i]; + /* SHT_NULL means sh_name has an undefined value */ + if (shdr->sh_type == SHT_NULL) + continue; + if (shdr->sh_name >= strhdr->sh_size) { + pr_err("Invalid ELF section name in module (section %u type %u)\n", + i, shdr->sh_type); + return -ENOEXEC; + } + } + + info->secstrings = secstrings; + return 0; +} + /* * Check userspace passed ELF module against our expectations, and cache * useful variables for further processing as we go. @@ -1812,7 +1877,7 @@ static int elf_validity_cache_sechdrs(struct load_info *info) static int elf_validity_cache_copy(struct load_info *info, int flags) { unsigned int i; - Elf_Shdr *shdr, *strhdr; + Elf_Shdr *shdr; int err; unsigned int num_mod_secs = 0, mod_idx; unsigned int num_info_secs = 0, info_idx; @@ -1821,34 +1886,9 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) err = elf_validity_cache_sechdrs(info); if (err < 0) return err; - - /* - * Verify if the section name table index is valid. - */ - if (info->hdr->e_shstrndx == SHN_UNDEF - || info->hdr->e_shstrndx >= info->hdr->e_shnum) { - pr_err("Invalid ELF section name index: %d || e_shstrndx (%d) >= e_shnum (%d)\n", - info->hdr->e_shstrndx, info->hdr->e_shstrndx, - info->hdr->e_shnum); - goto no_exec; - } - - strhdr = &info->sechdrs[info->hdr->e_shstrndx]; - - /* - * The section name table must be NUL-terminated, as required - * by the spec. This makes strcmp and pr_* calls that access - * strings in the section safe. - */ - info->secstrings = (void *)info->hdr + strhdr->sh_offset; - if (strhdr->sh_size == 0) { - pr_err("empty section name table\n"); - goto no_exec; - } - if (info->secstrings[strhdr->sh_size - 1] != '\0') { - pr_err("ELF Spec violation: section name table isn't null terminated\n"); - goto no_exec; - } + err = elf_validity_cache_secstrings(info); + if (err < 0) + return err; for (i = 1; i < info->hdr->e_shnum; i++) { shdr = &info->sechdrs[i]; @@ -1877,14 +1917,6 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) num_info_secs++; info_idx = i; } - - if (shdr->sh_flags & SHF_ALLOC) { - if (shdr->sh_name >= strhdr->sh_size) { - pr_err("Invalid ELF section name in module (section %u type %u)\n", - i, shdr->sh_type); - return -ENOEXEC; - } - } break; } } -- cgit v1.2.3 From fbc0e4e482aac7f2b0d20f0a11f6d5eeda346fda Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Tue, 15 Oct 2024 23:16:39 +0000 Subject: module: Factor out elf_validity_cache_index_info Centralize .modinfo detection and property validation. Signed-off-by: Matthew Maurer Reviewed-by: Sami Tolvanen Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 82 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 14 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index 6a9159afca02..511d645ac577 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -195,6 +195,38 @@ static unsigned int find_sec(const struct load_info *info, const char *name) return 0; } +/** + * find_any_unique_sec() - Find a unique section index by name + * @info: Load info for the module to scan + * @name: Name of the section we're looking for + * + * Locates a unique section by name. Ignores SHF_ALLOC. + * + * Return: Section index if found uniquely, zero if absent, negative count + * of total instances if multiple were found. + */ +static int find_any_unique_sec(const struct load_info *info, const char *name) +{ + unsigned int idx; + unsigned int count = 0; + int i; + + for (i = 1; i < info->hdr->e_shnum; i++) { + if (strcmp(info->secstrings + info->sechdrs[i].sh_name, + name) == 0) { + count++; + idx = i; + } + } + if (count == 1) { + return idx; + } else if (count == 0) { + return 0; + } else { + return -count; + } +} + /* Find a module section, or NULL. */ static void *section_addr(const struct load_info *info, const char *name) { @@ -1854,6 +1886,39 @@ static int elf_validity_cache_secstrings(struct load_info *info) return 0; } +/** + * elf_validity_cache_index_info() - Validate and cache modinfo section + * @info: Load info to populate the modinfo index on. + * Must have &load_info->sechdrs and &load_info->secstrings populated + * + * Checks that if there is a .modinfo section, it is unique. + * Then, it caches its index in &load_info->index.info. + * Finally, it tries to populate the name to improve error messages. + * + * Return: %0 if valid, %-ENOEXEC if multiple modinfo sections were found. + */ +static int elf_validity_cache_index_info(struct load_info *info) +{ + int info_idx; + + info_idx = find_any_unique_sec(info, ".modinfo"); + + if (info_idx == 0) + /* Early return, no .modinfo */ + return 0; + + if (info_idx < 0) { + pr_err("Only one .modinfo section must exist.\n"); + return -ENOEXEC; + } + + info->index.info = info_idx; + /* Try to find a name early so we can log errors with a module name */ + info->name = get_modinfo(info, "name"); + + return 0; +} + /* * Check userspace passed ELF module against our expectations, and cache * useful variables for further processing as we go. @@ -1880,13 +1945,15 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) Elf_Shdr *shdr; int err; unsigned int num_mod_secs = 0, mod_idx; - unsigned int num_info_secs = 0, info_idx; unsigned int num_sym_secs = 0, sym_idx; err = elf_validity_cache_sechdrs(info); if (err < 0) return err; err = elf_validity_cache_secstrings(info); + if (err < 0) + return err; + err = elf_validity_cache_index_info(info); if (err < 0) return err; @@ -1912,24 +1979,11 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) ".gnu.linkonce.this_module") == 0) { num_mod_secs++; mod_idx = i; - } else if (strcmp(info->secstrings + shdr->sh_name, - ".modinfo") == 0) { - num_info_secs++; - info_idx = i; } break; } } - if (num_info_secs > 1) { - pr_err("Only one .modinfo section must exist.\n"); - goto no_exec; - } else if (num_info_secs == 1) { - /* Try to find a name early so we can log errors with a module name */ - info->index.info = info_idx; - info->name = get_modinfo(info, "name"); - } - if (num_sym_secs != 1) { pr_warn("%s: module has no symbols (stripped?)\n", info->name ?: "(missing .modinfo section or name field)"); -- cgit v1.2.3 From 0be41a9367d1fbb16b4b57d81082341af114bad7 Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Tue, 15 Oct 2024 23:16:40 +0000 Subject: module: Factor out elf_validity_cache_index_mod Centralize .gnu.linkonce.this_module detection and property validation. Signed-off-by: Matthew Maurer Reviewed-by: Sami Tolvanen Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 129 ++++++++++++++++++++++++++------------------------- 1 file changed, 67 insertions(+), 62 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index 511d645ac577..ec638187ffcf 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1919,6 +1919,68 @@ static int elf_validity_cache_index_info(struct load_info *info) return 0; } +/** + * elf_validity_cache_index_mod() - Validates and caches this_module section + * @info: Load info to cache this_module on. + * Must have &load_info->sechdrs and &load_info->secstrings populated + * + * The ".gnu.linkonce.this_module" ELF section is special. It is what modpost + * uses to refer to __this_module and let's use rely on THIS_MODULE to point + * to &__this_module properly. The kernel's modpost declares it on each + * modules's *.mod.c file. If the struct module of the kernel changes a full + * kernel rebuild is required. + * + * We have a few expectations for this special section, this function + * validates all this for us: + * + * * The section has contents + * * The section is unique + * * We expect the kernel to always have to allocate it: SHF_ALLOC + * * The section size must match the kernel's run time's struct module + * size + * + * If all checks pass, the index will be cached in &load_info->index.mod + * + * Return: %0 on validation success, %-ENOEXEC on failure + */ +static int elf_validity_cache_index_mod(struct load_info *info) +{ + Elf_Shdr *shdr; + int mod_idx; + + mod_idx = find_any_unique_sec(info, ".gnu.linkonce.this_module"); + if (mod_idx <= 0) { + pr_err("module %s: Exactly one .gnu.linkonce.this_module section must exist.\n", + info->name ?: "(missing .modinfo section or name field)"); + return -ENOEXEC; + } + + shdr = &info->sechdrs[mod_idx]; + + if (shdr->sh_type == SHT_NOBITS) { + pr_err("module %s: .gnu.linkonce.this_module section must have a size set\n", + info->name ?: "(missing .modinfo section or name field)"); + return -ENOEXEC; + } + + if (!(shdr->sh_flags & SHF_ALLOC)) { + pr_err("module %s: .gnu.linkonce.this_module must occupy memory during process execution\n", + info->name ?: "(missing .modinfo section or name field)"); + return -ENOEXEC; + } + + if (shdr->sh_size != sizeof(struct module)) { + pr_err("module %s: .gnu.linkonce.this_module section size must match the kernel's built struct module size at run time\n", + info->name ?: "(missing .modinfo section or name field)"); + return -ENOEXEC; + } + + info->index.mod = mod_idx; + + return 0; +} + + /* * Check userspace passed ELF module against our expectations, and cache * useful variables for further processing as we go. @@ -1944,7 +2006,6 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) unsigned int i; Elf_Shdr *shdr; int err; - unsigned int num_mod_secs = 0, mod_idx; unsigned int num_sym_secs = 0, sym_idx; err = elf_validity_cache_sechdrs(info); @@ -1954,16 +2015,15 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) if (err < 0) return err; err = elf_validity_cache_index_info(info); + if (err < 0) + return err; + err = elf_validity_cache_index_mod(info); if (err < 0) return err; for (i = 1; i < info->hdr->e_shnum; i++) { shdr = &info->sechdrs[i]; - switch (shdr->sh_type) { - case SHT_NULL: - case SHT_NOBITS: - continue; - case SHT_SYMTAB: + if (shdr->sh_type == SHT_SYMTAB) { if (shdr->sh_link == SHN_UNDEF || shdr->sh_link >= info->hdr->e_shnum) { pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n", @@ -1973,14 +2033,6 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) } num_sym_secs++; sym_idx = i; - fallthrough; - default: - if (strcmp(info->secstrings + shdr->sh_name, - ".gnu.linkonce.this_module") == 0) { - num_mod_secs++; - mod_idx = i; - } - break; } } @@ -1996,55 +2048,8 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) info->index.str = shdr->sh_link; info->strtab = (char *)info->hdr + info->sechdrs[info->index.str].sh_offset; - /* - * The ".gnu.linkonce.this_module" ELF section is special. It is - * what modpost uses to refer to __this_module and let's use rely - * on THIS_MODULE to point to &__this_module properly. The kernel's - * modpost declares it on each modules's *.mod.c file. If the struct - * module of the kernel changes a full kernel rebuild is required. - * - * We have a few expectaions for this special section, the following - * code validates all this for us: - * - * o Only one section must exist - * o We expect the kernel to always have to allocate it: SHF_ALLOC - * o The section size must match the kernel's run time's struct module - * size - */ - if (num_mod_secs != 1) { - pr_err("module %s: Only one .gnu.linkonce.this_module section must exist.\n", - info->name ?: "(missing .modinfo section or name field)"); - goto no_exec; - } - - shdr = &info->sechdrs[mod_idx]; - - /* - * This is already implied on the switch above, however let's be - * pedantic about it. - */ - if (shdr->sh_type == SHT_NOBITS) { - pr_err("module %s: .gnu.linkonce.this_module section must have a size set\n", - info->name ?: "(missing .modinfo section or name field)"); - goto no_exec; - } - - if (!(shdr->sh_flags & SHF_ALLOC)) { - pr_err("module %s: .gnu.linkonce.this_module must occupy memory during process execution\n", - info->name ?: "(missing .modinfo section or name field)"); - goto no_exec; - } - - if (shdr->sh_size != sizeof(struct module)) { - pr_err("module %s: .gnu.linkonce.this_module section size must match the kernel's built struct module size at run time\n", - info->name ?: "(missing .modinfo section or name field)"); - goto no_exec; - } - - info->index.mod = mod_idx; - /* This is temporary: point mod into copy of data. */ - info->mod = (void *)info->hdr + shdr->sh_offset; + info->mod = (void *)info->hdr + info->sechdrs[info->index.mod].sh_offset; /* * If we didn't load the .modinfo 'name' field earlier, fall back to -- cgit v1.2.3 From 9bd4982cf7d65f4c9e0793d5a8fda6ad838e8554 Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Tue, 15 Oct 2024 23:16:41 +0000 Subject: module: Factor out elf_validity_cache_index_sym Centralize symbol table detection and property validation. Signed-off-by: Matthew Maurer Reviewed-by: Sami Tolvanen Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 73 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 29 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index ec638187ffcf..6be58b0a6468 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1980,6 +1980,39 @@ static int elf_validity_cache_index_mod(struct load_info *info) return 0; } +/** + * elf_validity_cache_index_sym() - Validate and cache symtab index + * @info: Load info to cache symtab index in. + * Must have &load_info->sechdrs and &load_info->secstrings populated. + * + * Checks that there is exactly one symbol table, then caches its index in + * &load_info->index.sym. + * + * Return: %0 if valid, %-ENOEXEC on failure. + */ +static int elf_validity_cache_index_sym(struct load_info *info) +{ + unsigned int sym_idx; + unsigned int num_sym_secs = 0; + int i; + + for (i = 1; i < info->hdr->e_shnum; i++) { + if (info->sechdrs[i].sh_type == SHT_SYMTAB) { + num_sym_secs++; + sym_idx = i; + } + } + + if (num_sym_secs != 1) { + pr_warn("%s: module has no symbols (stripped?)\n", + info->name ?: "(missing .modinfo section or name field)"); + return -ENOEXEC; + } + + info->index.sym = sym_idx; + + return 0; +} /* * Check userspace passed ELF module against our expectations, and cache @@ -2003,10 +2036,8 @@ static int elf_validity_cache_index_mod(struct load_info *info) */ static int elf_validity_cache_copy(struct load_info *info, int flags) { - unsigned int i; - Elf_Shdr *shdr; int err; - unsigned int num_sym_secs = 0, sym_idx; + int str_idx; err = elf_validity_cache_sechdrs(info); if (err < 0) @@ -2018,34 +2049,21 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) if (err < 0) return err; err = elf_validity_cache_index_mod(info); + if (err < 0) + return err; + err = elf_validity_cache_index_sym(info); if (err < 0) return err; - for (i = 1; i < info->hdr->e_shnum; i++) { - shdr = &info->sechdrs[i]; - if (shdr->sh_type == SHT_SYMTAB) { - if (shdr->sh_link == SHN_UNDEF - || shdr->sh_link >= info->hdr->e_shnum) { - pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n", - shdr->sh_link, shdr->sh_link, - info->hdr->e_shnum); - goto no_exec; - } - num_sym_secs++; - sym_idx = i; - } - } - - if (num_sym_secs != 1) { - pr_warn("%s: module has no symbols (stripped?)\n", - info->name ?: "(missing .modinfo section or name field)"); - goto no_exec; + str_idx = info->sechdrs[info->index.sym].sh_link; + if (str_idx == SHN_UNDEF || str_idx >= info->hdr->e_shnum) { + pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n", + str_idx, str_idx, info->hdr->e_shnum); + return -ENOEXEC; } - /* Sets internal symbols and strings. */ - info->index.sym = sym_idx; - shdr = &info->sechdrs[sym_idx]; - info->index.str = shdr->sh_link; + /* Sets internal strings. */ + info->index.str = str_idx; info->strtab = (char *)info->hdr + info->sechdrs[info->index.str].sh_offset; /* This is temporary: point mod into copy of data. */ @@ -2066,9 +2084,6 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) info->index.pcpu = find_pcpusec(info); return 0; - -no_exec: - return -ENOEXEC; } #define COPY_CHUNK_SIZE (16*PAGE_SIZE) -- cgit v1.2.3 From 0a9395334496d3be8bde491e46087540cb8f141d Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Tue, 15 Oct 2024 23:16:42 +0000 Subject: module: Factor out elf_validity_cache_index_str Pull out index validation for the symbol string section. Note that this does not validate the *contents* of the string table, only shape and presence of the section. Signed-off-by: Matthew Maurer Reviewed-by: Sami Tolvanen Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index 6be58b0a6468..43140475aac0 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2014,6 +2014,31 @@ static int elf_validity_cache_index_sym(struct load_info *info) return 0; } +/** + * elf_validity_cache_index_str() - Validate and cache strtab index + * @info: Load info to cache strtab index in. + * Must have &load_info->sechdrs and &load_info->secstrings populated. + * Must have &load_info->index.sym populated. + * + * Looks at the symbol table's associated string table, makes sure it is + * in-bounds, and caches it. + * + * Return: %0 if valid, %-ENOEXEC on failure. + */ +static int elf_validity_cache_index_str(struct load_info *info) +{ + unsigned int str_idx = info->sechdrs[info->index.sym].sh_link; + + if (str_idx == SHN_UNDEF || str_idx >= info->hdr->e_shnum) { + pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n", + str_idx, str_idx, info->hdr->e_shnum); + return -ENOEXEC; + } + + info->index.str = str_idx; + return 0; +} + /* * Check userspace passed ELF module against our expectations, and cache * useful variables for further processing as we go. @@ -2037,7 +2062,6 @@ static int elf_validity_cache_index_sym(struct load_info *info) static int elf_validity_cache_copy(struct load_info *info, int flags) { int err; - int str_idx; err = elf_validity_cache_sechdrs(info); if (err < 0) @@ -2054,16 +2078,11 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) err = elf_validity_cache_index_sym(info); if (err < 0) return err; - - str_idx = info->sechdrs[info->index.sym].sh_link; - if (str_idx == SHN_UNDEF || str_idx >= info->hdr->e_shnum) { - pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n", - str_idx, str_idx, info->hdr->e_shnum); - return -ENOEXEC; - } + err = elf_validity_cache_index_str(info); + if (err < 0) + return err; /* Sets internal strings. */ - info->index.str = str_idx; info->strtab = (char *)info->hdr + info->sechdrs[info->index.str].sh_offset; /* This is temporary: point mod into copy of data. */ -- cgit v1.2.3 From f3f561218bb60afd6d3e3b26add39ff46de89c83 Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Tue, 15 Oct 2024 23:16:43 +0000 Subject: module: Group section index calculations together Group all the index detection together to make the parent function easier to read. Signed-off-by: Matthew Maurer Reviewed-by: Sami Tolvanen Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 68 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 17 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index 43140475aac0..e04a228c694a 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2039,6 +2039,56 @@ static int elf_validity_cache_index_str(struct load_info *info) return 0; } +/** + * elf_validity_cache_index() - Resolve, validate, cache section indices + * @info: Load info to read from and update. + * &load_info->sechdrs and &load_info->secstrings must be populated. + * @flags: Load flags, relevant to suppress version loading, see + * uapi/linux/module.h + * + * Populates &load_info->index, validating as it goes. + * See child functions for per-field validation: + * + * * elf_validity_cache_index_info() + * * elf_validity_cache_index_mod() + * * elf_validity_cache_index_sym() + * * elf_validity_cache_index_str() + * + * If versioning is not suppressed via flags, load the version index from + * a section called "__versions" with no validation. + * + * If CONFIG_SMP is enabled, load the percpu section by name with no + * validation. + * + * Return: 0 on success, negative error code if an index failed validation. + */ +static int elf_validity_cache_index(struct load_info *info, int flags) +{ + int err; + + err = elf_validity_cache_index_info(info); + if (err < 0) + return err; + err = elf_validity_cache_index_mod(info); + if (err < 0) + return err; + err = elf_validity_cache_index_sym(info); + if (err < 0) + return err; + err = elf_validity_cache_index_str(info); + if (err < 0) + return err; + + if (flags & MODULE_INIT_IGNORE_MODVERSIONS) + info->index.vers = 0; /* Pretend no __versions section! */ + else + info->index.vers = find_sec(info, "__versions"); + + info->index.pcpu = find_pcpusec(info); + + return 0; +} + /* * Check userspace passed ELF module against our expectations, and cache * useful variables for further processing as we go. @@ -2069,16 +2119,7 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) err = elf_validity_cache_secstrings(info); if (err < 0) return err; - err = elf_validity_cache_index_info(info); - if (err < 0) - return err; - err = elf_validity_cache_index_mod(info); - if (err < 0) - return err; - err = elf_validity_cache_index_sym(info); - if (err < 0) - return err; - err = elf_validity_cache_index_str(info); + err = elf_validity_cache_index(info, flags); if (err < 0) return err; @@ -2095,13 +2136,6 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) if (!info->name) info->name = info->mod->name; - if (flags & MODULE_INIT_IGNORE_MODVERSIONS) - info->index.vers = 0; /* Pretend no __versions section! */ - else - info->index.vers = find_sec(info, "__versions"); - - info->index.pcpu = find_pcpusec(info); - return 0; } -- cgit v1.2.3 From 837031e052af32c747906238fb1feb87778e4fe0 Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Tue, 15 Oct 2024 23:16:44 +0000 Subject: module: Factor out elf_validity_cache_strtab This patch only moves the existing strtab population to a function. Validation comes in a following patch, this is split out to make the new validation checks more clearly separated. Signed-off-by: Matthew Maurer Reviewed-by: Sami Tolvanen Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index e04a228c694a..c082d5d41a8d 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2089,6 +2089,23 @@ static int elf_validity_cache_index(struct load_info *info, int flags) return 0; } +/** + * elf_validity_cache_strtab() - Cache symbol string table + * @info: Load info to read from and update. + * Must have &load_info->sechdrs and &load_info->secstrings populated. + * Must have &load_info->index populated. + * + * Return: 0 on success, negative error code if a check failed. + */ +static int elf_validity_cache_strtab(struct load_info *info) +{ + Elf_Shdr *str_shdr = &info->sechdrs[info->index.str]; + char *strtab = (char *)info->hdr + str_shdr->sh_offset; + + info->strtab = strtab; + return 0; +} + /* * Check userspace passed ELF module against our expectations, and cache * useful variables for further processing as we go. @@ -2122,9 +2139,9 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) err = elf_validity_cache_index(info, flags); if (err < 0) return err; - - /* Sets internal strings. */ - info->strtab = (char *)info->hdr + info->sechdrs[info->index.str].sh_offset; + err = elf_validity_cache_strtab(info); + if (err < 0) + return err; /* This is temporary: point mod into copy of data. */ info->mod = (void *)info->hdr + info->sechdrs[info->index.mod].sh_offset; -- cgit v1.2.3 From d979e3dffa93c9284f244ef64f7a68042c2f8b80 Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Tue, 15 Oct 2024 23:16:45 +0000 Subject: module: Additional validation in elf_validity_cache_strtab Validate properties of the strtab that are depended on elsewhere, but were previously unchecked: * String table nonempty (offset 0 is valid) * String table has a leading NUL (offset 0 corresponds to "") * String table is NUL terminated (strfoo functions won't run out of the table while reading). * All symbols names are inbounds of the string table. Signed-off-by: Matthew Maurer Reviewed-by: Sami Tolvanen Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index c082d5d41a8d..b40b632f00a6 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2090,17 +2090,53 @@ static int elf_validity_cache_index(struct load_info *info, int flags) } /** - * elf_validity_cache_strtab() - Cache symbol string table + * elf_validity_cache_strtab() - Validate and cache symbol string table * @info: Load info to read from and update. * Must have &load_info->sechdrs and &load_info->secstrings populated. * Must have &load_info->index populated. * + * Checks: + * + * * The string table is not empty. + * * The string table starts and ends with NUL (required by ELF spec). + * * Every &Elf_Sym->st_name offset in the symbol table is inbounds of the + * string table. + * + * And caches the pointer as &load_info->strtab in @info. + * * Return: 0 on success, negative error code if a check failed. */ static int elf_validity_cache_strtab(struct load_info *info) { Elf_Shdr *str_shdr = &info->sechdrs[info->index.str]; + Elf_Shdr *sym_shdr = &info->sechdrs[info->index.sym]; char *strtab = (char *)info->hdr + str_shdr->sh_offset; + Elf_Sym *syms = (void *)info->hdr + sym_shdr->sh_offset; + int i; + + if (str_shdr->sh_size == 0) { + pr_err("empty symbol string table\n"); + return -ENOEXEC; + } + if (strtab[0] != '\0') { + pr_err("symbol string table missing leading NUL\n"); + return -ENOEXEC; + } + if (strtab[str_shdr->sh_size - 1] != '\0') { + pr_err("symbol string table isn't NUL terminated\n"); + return -ENOEXEC; + } + + /* + * Now that we know strtab is correctly structured, check symbol + * starts are inbounds before they're used later. + */ + for (i = 0; i < sym_shdr->sh_size / sizeof(*syms); i++) { + if (syms[i].st_name >= str_shdr->sh_size) { + pr_err("symbol name out of bounds in string table"); + return -ENOEXEC; + } + } info->strtab = strtab; return 0; -- cgit v1.2.3 From 2295cf87ed5a6da4564034e4f8ebcce0a0a021ed Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Tue, 15 Oct 2024 23:16:46 +0000 Subject: module: Reformat struct for code style Using commas to declare struct members makes adding new members to this struct not as nice with patch management. Test results linux-modules-kpd succeed [0]. Signed-off-by: Matthew Maurer Reviewed-by: Sami Tolvanen [mcgrof: add automated test results from kdevops using KPD ] Link: https://github.com/linux-kdevops/linux-modules-kpd/actions/runs/11420095343 # [0] Signed-off-by: Luis Chamberlain --- kernel/module/internal.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/module') diff --git a/kernel/module/internal.h b/kernel/module/internal.h index 2ebece8a789f..daef2be83902 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -80,7 +80,12 @@ struct load_info { unsigned int used_pages; #endif struct { - unsigned int sym, str, mod, vers, info, pcpu; + unsigned int sym; + unsigned int str; + unsigned int mod; + unsigned int vers; + unsigned int info; + unsigned int pcpu; } index; }; -- cgit v1.2.3 From 05e555642c4613d5a2438351c705bb2119352757 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Jul 2024 01:17:52 -0400 Subject: regularize emptiness checks in fini_module(2) and vfs_dedupe_file_range() With few exceptions emptiness checks are done as fd_file(...) in boolean context (usually something like if (!fd_file(f))...); those will be taken care of later. However, there's a couple of places where we do those checks as 'store fd_file(...) into a variable, then check if this variable is NULL' and those are harder to spot. Get rid of those now. use fd_empty() instead of extracting file and then checking it for NULL. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/remap_range.c | 5 ++--- kernel/module/main.c | 4 +++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel/module') diff --git a/fs/remap_range.c b/fs/remap_range.c index 4403d5c68fcb..017d0d1ea6c9 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -537,9 +537,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) for (i = 0, info = same->info; i < count; i++, info++) { struct fd dst_fd = fdget(info->dest_fd); - struct file *dst_file = fd_file(dst_fd); - if (!dst_file) { + if (fd_empty(dst_fd)) { info->status = -EBADF; goto next_loop; } @@ -549,7 +548,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) goto next_fdput; } - deduped = vfs_dedupe_file_range_one(file, off, dst_file, + deduped = vfs_dedupe_file_range_one(file, off, fd_file(dst_fd), info->dest_offset, len, REMAP_FILE_CAN_SHORTEN); if (deduped == -EBADE) diff --git a/kernel/module/main.c b/kernel/module/main.c index 49b9bca9de12..d785973d8a51 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3202,7 +3202,7 @@ static int idempotent_init_module(struct file *f, const char __user * uargs, int { struct idempotent idem; - if (!f || !(f->f_mode & FMODE_READ)) + if (!(f->f_mode & FMODE_READ)) return -EBADF; /* Are we the winners of the race and get to do this? */ @@ -3234,6 +3234,8 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) return -EINVAL; f = fdget(fd); + if (fd_empty(f)) + return -EBADF; err = idempotent_init_module(fd_file(f), uargs, flags); fdput(f); return err; -- cgit v1.2.3 From 8152f8201088350c76bb9685cd5990dd51d59aff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 19 Jul 2024 21:19:02 -0400 Subject: fdget(), more trivial conversions all failure exits prior to fdget() leave the scope, all matching fdput() are immediately followed by leaving the scope. [xfs_ioc_commit_range() chunk moved here as well] Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- drivers/infiniband/core/ucma.c | 19 +++------- drivers/vfio/group.c | 6 +-- fs/eventpoll.c | 15 ++------ fs/ext4/ioctl.c | 21 ++++------- fs/f2fs/file.c | 15 +++----- fs/fsopen.c | 19 +++------- fs/fuse/dev.c | 6 +-- fs/locks.c | 15 +++----- fs/namespace.c | 47 ++++++++--------------- fs/notify/fanotify/fanotify_user.c | 29 ++++++--------- fs/notify/inotify/inotify_user.c | 21 ++++------- fs/ocfs2/cluster/heartbeat.c | 13 +++---- fs/open.c | 12 ++---- fs/read_write.c | 71 +++++++++++++---------------------- fs/splice.c | 45 +++++++++------------- fs/utimes.c | 11 ++---- fs/xfs/xfs_exchrange.c | 18 +++------ fs/xfs/xfs_ioctl.c | 69 +++++++++++----------------------- ipc/mqueue.c | 76 ++++++++++++-------------------------- kernel/module/main.c | 11 ++---- kernel/pid.c | 13 ++----- kernel/signal.c | 29 +++++---------- kernel/taskstats.c | 18 +++------ security/integrity/ima/ima_main.c | 7 +--- security/loadpin/loadpin.c | 8 +--- virt/kvm/vfio.c | 6 +-- 26 files changed, 202 insertions(+), 418 deletions(-) (limited to 'kernel/module') diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 5dbb248e9625..02f1666f3cba 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1615,7 +1615,6 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, struct ucma_event *uevent, *tmp; struct ucma_context *ctx; LIST_HEAD(event_list); - struct fd f; struct ucma_file *cur_file; int ret = 0; @@ -1623,21 +1622,17 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, return -EFAULT; /* Get current fd to protect against it being closed */ - f = fdget(cmd.fd); - if (!fd_file(f)) + CLASS(fd, f)(cmd.fd); + if (fd_empty(f)) return -ENOENT; - if (fd_file(f)->f_op != &ucma_fops) { - ret = -EINVAL; - goto file_put; - } + if (fd_file(f)->f_op != &ucma_fops) + return -EINVAL; cur_file = fd_file(f)->private_data; /* Validate current fd and prevent destruction of id. */ ctx = ucma_get_ctx(cur_file, cmd.id); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto file_put; - } + if (IS_ERR(ctx)) + return PTR_ERR(ctx); rdma_lock_handler(ctx->cm_id); /* @@ -1678,8 +1673,6 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, err_unlock: rdma_unlock_handler(ctx->cm_id); ucma_put_ctx(ctx); -file_put: - fdput(f); return ret; } diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index 95b336de8a17..49559605177e 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -104,15 +104,14 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, { struct vfio_container *container; struct iommufd_ctx *iommufd; - struct fd f; int ret; int fd; if (get_user(fd, arg)) return -EFAULT; - f = fdget(fd); - if (!fd_file(f)) + CLASS(fd, f)(fd); + if (fd_empty(f)) return -EBADF; mutex_lock(&group->group_lock); @@ -153,7 +152,6 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, out_unlock: mutex_unlock(&group->group_lock); - fdput(f); return ret; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 4607dcbc2851..7873d75a43cb 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2415,8 +2415,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, static int do_epoll_wait(int epfd, struct epoll_event __user *events, int maxevents, struct timespec64 *to) { - int error; - struct fd f; struct eventpoll *ep; /* The maximum number of event must be greater than zero */ @@ -2428,17 +2426,16 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events, return -EFAULT; /* Get the "struct file *" for the eventpoll file */ - f = fdget(epfd); - if (!fd_file(f)) + CLASS(fd, f)(epfd); + if (fd_empty(f)) return -EBADF; /* * We have to check that the file structure underneath the fd * the user passed to us _is_ an eventpoll file. */ - error = -EINVAL; if (!is_file_epoll(fd_file(f))) - goto error_fput; + return -EINVAL; /* * At this point it is safe to assume that the "private_data" contains @@ -2447,11 +2444,7 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events, ep = fd_file(f)->private_data; /* Time to fish for events ... */ - error = ep_poll(ep, events, maxevents, to); - -error_fput: - fdput(f); - return error; + return ep_poll(ep, events, maxevents, to); } SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 1c77400bd88e..7b9ce71c1c81 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1330,7 +1330,6 @@ group_extend_out: case EXT4_IOC_MOVE_EXT: { struct move_extent me; - struct fd donor; int err; if (!(filp->f_mode & FMODE_READ) || @@ -1342,30 +1341,26 @@ group_extend_out: return -EFAULT; me.moved_len = 0; - donor = fdget(me.donor_fd); - if (!fd_file(donor)) + CLASS(fd, donor)(me.donor_fd); + if (fd_empty(donor)) return -EBADF; - if (!(fd_file(donor)->f_mode & FMODE_WRITE)) { - err = -EBADF; - goto mext_out; - } + if (!(fd_file(donor)->f_mode & FMODE_WRITE)) + return -EBADF; if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online defrag not supported with bigalloc"); - err = -EOPNOTSUPP; - goto mext_out; + return -EOPNOTSUPP; } else if (IS_DAX(inode)) { ext4_msg(sb, KERN_ERR, "Online defrag not supported with DAX"); - err = -EOPNOTSUPP; - goto mext_out; + return -EOPNOTSUPP; } err = mnt_want_write_file(filp); if (err) - goto mext_out; + return err; err = ext4_move_extents(filp, fd_file(donor), me.orig_start, me.donor_start, me.len, &me.moved_len); @@ -1374,8 +1369,6 @@ group_extend_out: if (copy_to_user((struct move_extent __user *)arg, &me, sizeof(me))) err = -EFAULT; -mext_out: - fdput(donor); return err; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9ae54c4c72fe..8ba0b6d47c8c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3038,32 +3038,27 @@ out: static int __f2fs_ioc_move_range(struct file *filp, struct f2fs_move_range *range) { - struct fd dst; int err; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) return -EBADF; - dst = fdget(range->dst_fd); - if (!fd_file(dst)) + CLASS(fd, dst)(range->dst_fd); + if (fd_empty(dst)) return -EBADF; - if (!(fd_file(dst)->f_mode & FMODE_WRITE)) { - err = -EBADF; - goto err_out; - } + if (!(fd_file(dst)->f_mode & FMODE_WRITE)) + return -EBADF; err = mnt_want_write_file(filp); if (err) - goto err_out; + return err; err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst), range->pos_out, range->len); mnt_drop_write_file(filp); -err_out: - fdput(dst); return err; } diff --git a/fs/fsopen.c b/fs/fsopen.c index 6cef3deccded..094a7f510edf 100644 --- a/fs/fsopen.c +++ b/fs/fsopen.c @@ -349,7 +349,6 @@ SYSCALL_DEFINE5(fsconfig, int, aux) { struct fs_context *fc; - struct fd f; int ret; int lookup_flags = 0; @@ -392,12 +391,11 @@ SYSCALL_DEFINE5(fsconfig, return -EOPNOTSUPP; } - f = fdget(fd); - if (!fd_file(f)) + CLASS(fd, f)(fd); + if (fd_empty(f)) return -EBADF; - ret = -EINVAL; if (fd_file(f)->f_op != &fscontext_fops) - goto out_f; + return -EINVAL; fc = fd_file(f)->private_data; if (fc->ops == &legacy_fs_context_ops) { @@ -407,17 +405,14 @@ SYSCALL_DEFINE5(fsconfig, case FSCONFIG_SET_PATH_EMPTY: case FSCONFIG_SET_FD: case FSCONFIG_CMD_CREATE_EXCL: - ret = -EOPNOTSUPP; - goto out_f; + return -EOPNOTSUPP; } } if (_key) { param.key = strndup_user(_key, 256); - if (IS_ERR(param.key)) { - ret = PTR_ERR(param.key); - goto out_f; - } + if (IS_ERR(param.key)) + return PTR_ERR(param.key); } switch (cmd) { @@ -496,7 +491,5 @@ SYSCALL_DEFINE5(fsconfig, } out_key: kfree(param.key); -out_f: - fdput(f); return ret; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 1f64ae6d7a69..0723c6344b20 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2371,13 +2371,12 @@ static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp) int res; int oldfd; struct fuse_dev *fud = NULL; - struct fd f; if (get_user(oldfd, argp)) return -EFAULT; - f = fdget(oldfd); - if (!fd_file(f)) + CLASS(fd, f)(oldfd); + if (fd_empty(f)) return -EINVAL; /* @@ -2394,7 +2393,6 @@ static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp) mutex_unlock(&fuse_mutex); } - fdput(f); return res; } diff --git a/fs/locks.c b/fs/locks.c index 204847628f3e..25afc8d9c9d1 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2136,7 +2136,6 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) { int can_sleep, error, type; struct file_lock fl; - struct fd f; /* * LOCK_MAND locks were broken for a long time in that they never @@ -2155,19 +2154,18 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) if (type < 0) return type; - error = -EBADF; - f = fdget(fd); - if (!fd_file(f)) - return error; + CLASS(fd, f)(fd); + if (fd_empty(f)) + return -EBADF; if (type != F_UNLCK && !(fd_file(f)->f_mode & (FMODE_READ | FMODE_WRITE))) - goto out_putf; + return -EBADF; flock_make_lock(fd_file(f), &fl, type); error = security_file_lock(fd_file(f), fl.c.flc_type); if (error) - goto out_putf; + return error; can_sleep = !(cmd & LOCK_NB); if (can_sleep) @@ -2181,9 +2179,6 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) error = locks_lock_file_wait(fd_file(f), &fl); locks_release_private(&fl); - out_putf: - fdput(f); - return error; } diff --git a/fs/namespace.c b/fs/namespace.c index 93c377816d75..d2eccbdd0439 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4105,7 +4105,6 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, struct file *file; struct path newmount; struct mount *mnt; - struct fd f; unsigned int mnt_flags = 0; long ret; @@ -4133,19 +4132,18 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, return -EINVAL; } - f = fdget(fs_fd); - if (!fd_file(f)) + CLASS(fd, f)(fs_fd); + if (fd_empty(f)) return -EBADF; - ret = -EINVAL; if (fd_file(f)->f_op != &fscontext_fops) - goto err_fsfd; + return -EINVAL; fc = fd_file(f)->private_data; ret = mutex_lock_interruptible(&fc->uapi_mutex); if (ret < 0) - goto err_fsfd; + return ret; /* There must be a valid superblock or we can't mount it */ ret = -EINVAL; @@ -4212,8 +4210,6 @@ err_path: path_put(&newmount); err_unlock: mutex_unlock(&fc->uapi_mutex); -err_fsfd: - fdput(f); return ret; } @@ -4668,10 +4664,8 @@ out: static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, struct mount_kattr *kattr, unsigned int flags) { - int err = 0; struct ns_common *ns; struct user_namespace *mnt_userns; - struct fd f; if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP)) return 0; @@ -4687,20 +4681,16 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, if (attr->userns_fd > INT_MAX) return -EINVAL; - f = fdget(attr->userns_fd); - if (!fd_file(f)) + CLASS(fd, f)(attr->userns_fd); + if (fd_empty(f)) return -EBADF; - if (!proc_ns_file(fd_file(f))) { - err = -EINVAL; - goto out_fput; - } + if (!proc_ns_file(fd_file(f))) + return -EINVAL; ns = get_proc_ns(file_inode(fd_file(f))); - if (ns->ops->type != CLONE_NEWUSER) { - err = -EINVAL; - goto out_fput; - } + if (ns->ops->type != CLONE_NEWUSER) + return -EINVAL; /* * The initial idmapping cannot be used to create an idmapped @@ -4711,22 +4701,15 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, * result. */ mnt_userns = container_of(ns, struct user_namespace, ns); - if (mnt_userns == &init_user_ns) { - err = -EPERM; - goto out_fput; - } + if (mnt_userns == &init_user_ns) + return -EPERM; /* We're not controlling the target namespace. */ - if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) { - err = -EPERM; - goto out_fput; - } + if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) + return -EPERM; kattr->mnt_userns = get_user_ns(mnt_userns); - -out_fput: - fdput(f); - return err; + return 0; } static int build_mount_kattr(const struct mount_attr *attr, size_t usize, diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 07c5ffc8523b..e19b28b44805 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1677,7 +1677,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct inode *inode = NULL; struct vfsmount *mnt = NULL; struct fsnotify_group *group; - struct fd f; struct path path; struct fan_fsid __fsid, *fsid = NULL; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; @@ -1747,14 +1746,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, umask = FANOTIFY_EVENT_FLAGS; } - f = fdget(fanotify_fd); - if (unlikely(!fd_file(f))) + CLASS(fd, f)(fanotify_fd); + if (fd_empty(f)) return -EBADF; /* verify that this is indeed an fanotify instance */ - ret = -EINVAL; if (unlikely(fd_file(f)->f_op != &fanotify_fops)) - goto fput_and_out; + return -EINVAL; group = fd_file(f)->private_data; /* @@ -1762,23 +1760,21 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, * marks. This also includes setting up such marks by a group that * was initialized by an unprivileged user. */ - ret = -EPERM; if ((!capable(CAP_SYS_ADMIN) || FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) && mark_type != FAN_MARK_INODE) - goto fput_and_out; + return -EPERM; /* * Permission events require minimum priority FAN_CLASS_CONTENT. */ - ret = -EINVAL; if (mask & FANOTIFY_PERM_EVENTS && group->priority < FSNOTIFY_PRIO_CONTENT) - goto fput_and_out; + return -EINVAL; if (mask & FAN_FS_ERROR && mark_type != FAN_MARK_FILESYSTEM) - goto fput_and_out; + return -EINVAL; /* * Evictable is only relevant for inode marks, because only inode object @@ -1786,7 +1782,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, */ if (flags & FAN_MARK_EVICTABLE && mark_type != FAN_MARK_INODE) - goto fput_and_out; + return -EINVAL; /* * Events that do not carry enough information to report @@ -1798,7 +1794,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) && (!fid_mode || mark_type == FAN_MARK_MOUNT)) - goto fput_and_out; + return -EINVAL; /* * FAN_RENAME uses special info type records to report the old and @@ -1806,23 +1802,22 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, * useful and was not implemented. */ if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME)) - goto fput_and_out; + return -EINVAL; if (mark_cmd == FAN_MARK_FLUSH) { - ret = 0; if (mark_type == FAN_MARK_MOUNT) fsnotify_clear_vfsmount_marks_by_group(group); else if (mark_type == FAN_MARK_FILESYSTEM) fsnotify_clear_sb_marks_by_group(group); else fsnotify_clear_inode_marks_by_group(group); - goto fput_and_out; + return 0; } ret = fanotify_find_path(dfd, pathname, &path, flags, (mask & ALL_FSNOTIFY_EVENTS), obj_type); if (ret) - goto fput_and_out; + return ret; if (mark_cmd == FAN_MARK_ADD) { ret = fanotify_events_supported(group, &path, mask, flags); @@ -1901,8 +1896,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, path_put_and_out: path_put(&path); -fput_and_out: - fdput(f); return ret; } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index dc645af2a6ad..e0c48956608a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -732,7 +732,6 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, struct fsnotify_group *group; struct inode *inode; struct path path; - struct fd f; int ret; unsigned flags = 0; @@ -752,21 +751,17 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, if (unlikely(!(mask & ALL_INOTIFY_BITS))) return -EINVAL; - f = fdget(fd); - if (unlikely(!fd_file(f))) + CLASS(fd, f)(fd); + if (fd_empty(f)) return -EBADF; /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */ - if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) { - ret = -EINVAL; - goto fput_and_out; - } + if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) + return -EINVAL; /* verify that this is indeed an inotify instance */ - if (unlikely(fd_file(f)->f_op != &inotify_fops)) { - ret = -EINVAL; - goto fput_and_out; - } + if (unlikely(fd_file(f)->f_op != &inotify_fops)) + return -EINVAL; if (!(mask & IN_DONT_FOLLOW)) flags |= LOOKUP_FOLLOW; @@ -776,7 +771,7 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, ret = inotify_find_inode(pathname, &path, flags, (mask & IN_ALL_EVENTS)); if (ret) - goto fput_and_out; + return ret; /* inode held in place by reference to path; group by fget on fd */ inode = path.dentry->d_inode; @@ -785,8 +780,6 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); path_put(&path); -fput_and_out: - fdput(f); return ret; } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index bc55340a60c3..4200a0341343 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1765,7 +1765,6 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, long fd; int sectsize; char *p = (char *)page; - struct fd f; ssize_t ret = -EINVAL; int live_threshold; @@ -1784,23 +1783,23 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, if (fd < 0 || fd >= INT_MAX) return -EINVAL; - f = fdget(fd); - if (fd_file(f) == NULL) + CLASS(fd, f)(fd); + if (fd_empty(f)) return -EINVAL; if (reg->hr_blocks == 0 || reg->hr_start_block == 0 || reg->hr_block_bytes == 0) - goto out2; + return -EINVAL; if (!S_ISBLK(fd_file(f)->f_mapping->host->i_mode)) - goto out2; + return -EINVAL; reg->hr_bdev_file = bdev_file_open_by_dev(fd_file(f)->f_mapping->host->i_rdev, BLK_OPEN_WRITE | BLK_OPEN_READ, NULL, NULL); if (IS_ERR(reg->hr_bdev_file)) { ret = PTR_ERR(reg->hr_bdev_file); reg->hr_bdev_file = NULL; - goto out2; + return ret; } sectsize = bdev_logical_block_size(reg_bdev(reg)); @@ -1906,8 +1905,6 @@ out3: fput(reg->hr_bdev_file); reg->hr_bdev_file = NULL; } -out2: - fdput(f); return ret; } diff --git a/fs/open.c b/fs/open.c index 24d22f4222f0..33468aaa5311 100644 --- a/fs/open.c +++ b/fs/open.c @@ -187,19 +187,13 @@ long do_ftruncate(struct file *file, loff_t length, int small) long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { - struct fd f; - int error; - if (length < 0) return -EINVAL; - f = fdget(fd); - if (!fd_file(f)) + CLASS(fd, f)(fd); + if (fd_empty(f)) return -EBADF; - error = do_ftruncate(fd_file(f), length, small); - - fdput(f); - return error; + return do_ftruncate(fd_file(f), length, small); } SYSCALL_DEFINE2(ftruncate, unsigned int, fd, off_t, length) diff --git a/fs/read_write.c b/fs/read_write.c index 5e3df2d39283..deb87457aa76 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -745,21 +745,17 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, loff_t pos) { - struct fd f; - ssize_t ret = -EBADF; - if (pos < 0) return -EINVAL; - f = fdget(fd); - if (fd_file(f)) { - ret = -ESPIPE; - if (fd_file(f)->f_mode & FMODE_PREAD) - ret = vfs_read(fd_file(f), buf, count, &pos); - fdput(f); - } + CLASS(fd, f)(fd); + if (fd_empty(f)) + return -EBADF; - return ret; + if (fd_file(f)->f_mode & FMODE_PREAD) + return vfs_read(fd_file(f), buf, count, &pos); + + return -ESPIPE; } SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, @@ -779,21 +775,17 @@ COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, buf, ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos) { - struct fd f; - ssize_t ret = -EBADF; - if (pos < 0) return -EINVAL; - f = fdget(fd); - if (fd_file(f)) { - ret = -ESPIPE; - if (fd_file(f)->f_mode & FMODE_PWRITE) - ret = vfs_write(fd_file(f), buf, count, &pos); - fdput(f); - } + CLASS(fd, f)(fd); + if (fd_empty(f)) + return -EBADF; - return ret; + if (fd_file(f)->f_mode & FMODE_PWRITE) + return vfs_write(fd_file(f), buf, count, &pos); + + return -ESPIPE; } SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, @@ -1307,7 +1299,6 @@ COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, loff_t max) { - struct fd in, out; struct inode *in_inode, *out_inode; struct pipe_inode_info *opipe; loff_t pos; @@ -1318,35 +1309,32 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, /* * Get input file, and verify that it is ok.. */ - retval = -EBADF; - in = fdget(in_fd); - if (!fd_file(in)) - goto out; + CLASS(fd, in)(in_fd); + if (fd_empty(in)) + return -EBADF; if (!(fd_file(in)->f_mode & FMODE_READ)) - goto fput_in; - retval = -ESPIPE; + return -EBADF; if (!ppos) { pos = fd_file(in)->f_pos; } else { pos = *ppos; if (!(fd_file(in)->f_mode & FMODE_PREAD)) - goto fput_in; + return -ESPIPE; } retval = rw_verify_area(READ, fd_file(in), &pos, count); if (retval < 0) - goto fput_in; + return retval; if (count > MAX_RW_COUNT) count = MAX_RW_COUNT; /* * Get output file, and verify that it is ok.. */ - retval = -EBADF; - out = fdget(out_fd); - if (!fd_file(out)) - goto fput_in; + CLASS(fd, out)(out_fd); + if (fd_empty(out)) + return -EBADF; if (!(fd_file(out)->f_mode & FMODE_WRITE)) - goto fput_out; + return -EBADF; in_inode = file_inode(fd_file(in)); out_inode = file_inode(fd_file(out)); out_pos = fd_file(out)->f_pos; @@ -1355,9 +1343,8 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); if (unlikely(pos + count > max)) { - retval = -EOVERFLOW; if (pos >= max) - goto fput_out; + return -EOVERFLOW; count = max - pos; } @@ -1376,7 +1363,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, if (!opipe) { retval = rw_verify_area(WRITE, fd_file(out), &out_pos, count); if (retval < 0) - goto fput_out; + return retval; retval = do_splice_direct(fd_file(in), &pos, fd_file(out), &out_pos, count, fl); } else { @@ -1402,12 +1389,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, inc_syscw(current); if (pos > max) retval = -EOVERFLOW; - -fput_out: - fdput(out); -fput_in: - fdput(in); -out: return retval; } diff --git a/fs/splice.c b/fs/splice.c index 29cd39d7f4a0..2898fa1e9e63 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1622,27 +1622,22 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags) { - struct fd in, out; - ssize_t error; - if (unlikely(!len)) return 0; if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; - error = -EBADF; - in = fdget(fd_in); - if (fd_file(in)) { - out = fdget(fd_out); - if (fd_file(out)) { - error = __do_splice(fd_file(in), off_in, fd_file(out), off_out, + CLASS(fd, in)(fd_in); + if (fd_empty(in)) + return -EBADF; + + CLASS(fd, out)(fd_out); + if (fd_empty(out)) + return -EBADF; + + return __do_splice(fd_file(in), off_in, fd_file(out), off_out, len, flags); - fdput(out); - } - fdput(in); - } - return error; } /* @@ -1992,25 +1987,19 @@ ssize_t do_tee(struct file *in, struct file *out, size_t len, SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { - struct fd in, out; - ssize_t error; - if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; if (unlikely(!len)) return 0; - error = -EBADF; - in = fdget(fdin); - if (fd_file(in)) { - out = fdget(fdout); - if (fd_file(out)) { - error = do_tee(fd_file(in), fd_file(out), len, flags); - fdput(out); - } - fdput(in); - } + CLASS(fd, in)(fdin); + if (fd_empty(in)) + return -EBADF; - return error; + CLASS(fd, out)(fdout); + if (fd_empty(out)) + return -EBADF; + + return do_tee(fd_file(in), fd_file(out), len, flags); } diff --git a/fs/utimes.c b/fs/utimes.c index 99b26f792b89..c7c7958e57b2 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -108,18 +108,13 @@ retry: static int do_utimes_fd(int fd, struct timespec64 *times, int flags) { - struct fd f; - int error; - if (flags) return -EINVAL; - f = fdget(fd); - if (!fd_file(f)) + CLASS(fd, f)(fd); + if (fd_empty(f)) return -EBADF; - error = vfs_utimes(&fd_file(f)->f_path, times); - fdput(f); - return error; + return vfs_utimes(&fd_file(f)->f_path, times); } /* diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index 75cb53f090d1..fa29c8b334d2 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -813,8 +813,6 @@ xfs_ioc_exchange_range( .file2 = file, }; struct xfs_exchange_range args; - struct fd file1; - int error; if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; @@ -828,14 +826,12 @@ xfs_ioc_exchange_range( fxr.length = args.length; fxr.flags = args.flags; - file1 = fdget(args.file1_fd); - if (!fd_file(file1)) + CLASS(fd, file1)(args.file1_fd); + if (fd_empty(file1)) return -EBADF; fxr.file1 = fd_file(file1); - error = xfs_exchange_range(&fxr); - fdput(file1); - return error; + return xfs_exchange_range(&fxr); } /* Opaque freshness blob for XFS_IOC_COMMIT_RANGE */ @@ -909,8 +905,6 @@ xfs_ioc_commit_range( struct xfs_commit_range_fresh *kern_f; struct xfs_inode *ip2 = XFS_I(file_inode(file)); struct xfs_mount *mp = ip2->i_mount; - struct fd file1; - int error; kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness; @@ -934,12 +928,10 @@ xfs_ioc_commit_range( fxr.file2_ctime.tv_sec = kern_f->file2_ctime; fxr.file2_ctime.tv_nsec = kern_f->file2_ctime_nsec; - file1 = fdget(args.file1_fd); + CLASS(fd, file1)(args.file1_fd); if (fd_empty(file1)) return -EBADF; fxr.file1 = fd_file(file1); - error = xfs_exchange_range(&fxr); - fdput(file1); - return error; + return xfs_exchange_range(&fxr); } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a20d426ef021..a24fcdc8ad4f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -881,41 +881,29 @@ xfs_ioc_swapext( xfs_swapext_t *sxp) { xfs_inode_t *ip, *tip; - struct fd f, tmp; - int error = 0; /* Pull information for the target fd */ - f = fdget((int)sxp->sx_fdtarget); - if (!fd_file(f)) { - error = -EINVAL; - goto out; - } + CLASS(fd, f)((int)sxp->sx_fdtarget); + if (fd_empty(f)) + return -EINVAL; if (!(fd_file(f)->f_mode & FMODE_WRITE) || !(fd_file(f)->f_mode & FMODE_READ) || - (fd_file(f)->f_flags & O_APPEND)) { - error = -EBADF; - goto out_put_file; - } + (fd_file(f)->f_flags & O_APPEND)) + return -EBADF; - tmp = fdget((int)sxp->sx_fdtmp); - if (!fd_file(tmp)) { - error = -EINVAL; - goto out_put_file; - } + CLASS(fd, tmp)((int)sxp->sx_fdtmp); + if (fd_empty(tmp)) + return -EINVAL; if (!(fd_file(tmp)->f_mode & FMODE_WRITE) || !(fd_file(tmp)->f_mode & FMODE_READ) || - (fd_file(tmp)->f_flags & O_APPEND)) { - error = -EBADF; - goto out_put_tmp_file; - } + (fd_file(tmp)->f_flags & O_APPEND)) + return -EBADF; if (IS_SWAPFILE(file_inode(fd_file(f))) || - IS_SWAPFILE(file_inode(fd_file(tmp)))) { - error = -EINVAL; - goto out_put_tmp_file; - } + IS_SWAPFILE(file_inode(fd_file(tmp)))) + return -EINVAL; /* * We need to ensure that the fds passed in point to XFS inodes @@ -923,37 +911,22 @@ xfs_ioc_swapext( * control over what the user passes us here. */ if (fd_file(f)->f_op != &xfs_file_operations || - fd_file(tmp)->f_op != &xfs_file_operations) { - error = -EINVAL; - goto out_put_tmp_file; - } + fd_file(tmp)->f_op != &xfs_file_operations) + return -EINVAL; ip = XFS_I(file_inode(fd_file(f))); tip = XFS_I(file_inode(fd_file(tmp))); - if (ip->i_mount != tip->i_mount) { - error = -EINVAL; - goto out_put_tmp_file; - } - - if (ip->i_ino == tip->i_ino) { - error = -EINVAL; - goto out_put_tmp_file; - } + if (ip->i_mount != tip->i_mount) + return -EINVAL; - if (xfs_is_shutdown(ip->i_mount)) { - error = -EIO; - goto out_put_tmp_file; - } + if (ip->i_ino == tip->i_ino) + return -EINVAL; - error = xfs_swap_extents(ip, tip, sxp); + if (xfs_is_shutdown(ip->i_mount)) + return -EIO; - out_put_tmp_file: - fdput(tmp); - out_put_file: - fdput(f); - out: - return error; + return xfs_swap_extents(ip, tip, sxp); } static int diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 4f1dec518fae..35b4f8659904 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1063,7 +1063,6 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, size_t msg_len, unsigned int msg_prio, struct timespec64 *ts) { - struct fd f; struct inode *inode; struct ext_wait_queue wait; struct ext_wait_queue *receiver; @@ -1084,37 +1083,27 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, audit_mq_sendrecv(mqdes, msg_len, msg_prio, ts); - f = fdget(mqdes); - if (unlikely(!fd_file(f))) { - ret = -EBADF; - goto out; - } + CLASS(fd, f)(mqdes); + if (fd_empty(f)) + return -EBADF; inode = file_inode(fd_file(f)); - if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) { - ret = -EBADF; - goto out_fput; - } + if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) + return -EBADF; info = MQUEUE_I(inode); audit_file(fd_file(f)); - if (unlikely(!(fd_file(f)->f_mode & FMODE_WRITE))) { - ret = -EBADF; - goto out_fput; - } + if (unlikely(!(fd_file(f)->f_mode & FMODE_WRITE))) + return -EBADF; - if (unlikely(msg_len > info->attr.mq_msgsize)) { - ret = -EMSGSIZE; - goto out_fput; - } + if (unlikely(msg_len > info->attr.mq_msgsize)) + return -EMSGSIZE; /* First try to allocate memory, before doing anything with * existing queues. */ msg_ptr = load_msg(u_msg_ptr, msg_len); - if (IS_ERR(msg_ptr)) { - ret = PTR_ERR(msg_ptr); - goto out_fput; - } + if (IS_ERR(msg_ptr)) + return PTR_ERR(msg_ptr); msg_ptr->m_ts = msg_len; msg_ptr->m_type = msg_prio; @@ -1172,9 +1161,6 @@ out_unlock: out_free: if (ret) free_msg(msg_ptr); -out_fput: - fdput(f); -out: return ret; } @@ -1184,7 +1170,6 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, { ssize_t ret; struct msg_msg *msg_ptr; - struct fd f; struct inode *inode; struct mqueue_inode_info *info; struct ext_wait_queue wait; @@ -1198,30 +1183,22 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, audit_mq_sendrecv(mqdes, msg_len, 0, ts); - f = fdget(mqdes); - if (unlikely(!fd_file(f))) { - ret = -EBADF; - goto out; - } + CLASS(fd, f)(mqdes); + if (fd_empty(f)) + return -EBADF; inode = file_inode(fd_file(f)); - if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) { - ret = -EBADF; - goto out_fput; - } + if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) + return -EBADF; info = MQUEUE_I(inode); audit_file(fd_file(f)); - if (unlikely(!(fd_file(f)->f_mode & FMODE_READ))) { - ret = -EBADF; - goto out_fput; - } + if (unlikely(!(fd_file(f)->f_mode & FMODE_READ))) + return -EBADF; /* checks if buffer is big enough */ - if (unlikely(msg_len < info->attr.mq_msgsize)) { - ret = -EMSGSIZE; - goto out_fput; - } + if (unlikely(msg_len < info->attr.mq_msgsize)) + return -EMSGSIZE; /* * msg_insert really wants us to have a valid, spare node struct so @@ -1275,9 +1252,6 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, } free_msg(msg_ptr); } -out_fput: - fdput(f); -out: return ret; } @@ -1437,21 +1411,18 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, static int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old) { - struct fd f; struct inode *inode; struct mqueue_inode_info *info; if (new && (new->mq_flags & (~O_NONBLOCK))) return -EINVAL; - f = fdget(mqdes); - if (!fd_file(f)) + CLASS(fd, f)(mqdes); + if (fd_empty(f)) return -EBADF; - if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) { - fdput(f); + if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) return -EBADF; - } inode = file_inode(fd_file(f)); info = MQUEUE_I(inode); @@ -1475,7 +1446,6 @@ static int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old) } spin_unlock(&info->lock); - fdput(f); return 0; } diff --git a/kernel/module/main.c b/kernel/module/main.c index d785973d8a51..4490924fe24e 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3219,10 +3219,7 @@ static int idempotent_init_module(struct file *f, const char __user * uargs, int SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) { - int err; - struct fd f; - - err = may_init_module(); + int err = may_init_module(); if (err) return err; @@ -3233,12 +3230,10 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) |MODULE_INIT_COMPRESSED_FILE)) return -EINVAL; - f = fdget(fd); + CLASS(fd, f)(fd); if (fd_empty(f)) return -EBADF; - err = idempotent_init_module(fd_file(f), uargs, flags); - fdput(f); - return err; + return idempotent_init_module(fd_file(f), uargs, flags); } /* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */ diff --git a/kernel/pid.c b/kernel/pid.c index b5bbc1a8a6e4..115448e89c3e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -744,23 +744,18 @@ SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd, unsigned int, flags) { struct pid *pid; - struct fd f; - int ret; /* flags is currently unused - make sure it's unset */ if (flags) return -EINVAL; - f = fdget(pidfd); - if (!fd_file(f)) + CLASS(fd, f)(pidfd); + if (fd_empty(f)) return -EBADF; pid = pidfd_pid(fd_file(f)); if (IS_ERR(pid)) - ret = PTR_ERR(pid); - else - ret = pidfd_getfd(pid, fd); + return PTR_ERR(pid); - fdput(f); - return ret; + return pidfd_getfd(pid, fd); } diff --git a/kernel/signal.c b/kernel/signal.c index 4344860ffcac..6be807ecb94c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3908,7 +3908,6 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, siginfo_t __user *, info, unsigned int, flags) { int ret; - struct fd f; struct pid *pid; kernel_siginfo_t kinfo; enum pid_type type; @@ -3921,20 +3920,17 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1) return -EINVAL; - f = fdget(pidfd); - if (!fd_file(f)) + CLASS(fd, f)(pidfd); + if (fd_empty(f)) return -EBADF; /* Is this a pidfd? */ pid = pidfd_to_pid(fd_file(f)); - if (IS_ERR(pid)) { - ret = PTR_ERR(pid); - goto err; - } + if (IS_ERR(pid)) + return PTR_ERR(pid); - ret = -EINVAL; if (!access_pidfd_pidns(pid)) - goto err; + return -EINVAL; switch (flags) { case 0: @@ -3958,28 +3954,23 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, if (info) { ret = copy_siginfo_from_user_any(&kinfo, info); if (unlikely(ret)) - goto err; + return ret; - ret = -EINVAL; if (unlikely(sig != kinfo.si_signo)) - goto err; + return -EINVAL; /* Only allow sending arbitrary signals to yourself. */ - ret = -EPERM; if ((task_pid(current) != pid || type > PIDTYPE_TGID) && (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) - goto err; + return -EPERM; } else { prepare_kill_siginfo(sig, &kinfo, type); } if (type == PIDTYPE_PGID) - ret = kill_pgrp_info(sig, &kinfo, pid); + return kill_pgrp_info(sig, &kinfo, pid); else - ret = kill_pid_info_type(sig, &kinfo, pid, type); -err: - fdput(f); - return ret; + return kill_pid_info_type(sig, &kinfo, pid, type); } static int diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 0700f40c53ac..0cd680ccc7e5 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -411,15 +411,14 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) struct nlattr *na; size_t size; u32 fd; - struct fd f; na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; if (!na) return -EINVAL; fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); - f = fdget(fd); - if (!fd_file(f)) + CLASS(fd, f)(fd); + if (fd_empty(f)) return 0; size = nla_total_size(sizeof(struct cgroupstats)); @@ -427,14 +426,13 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) - goto err; + return rc; na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, sizeof(struct cgroupstats)); if (na == NULL) { nlmsg_free(rep_skb); - rc = -EMSGSIZE; - goto err; + return -EMSGSIZE; } stats = nla_data(na); @@ -443,14 +441,10 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) rc = cgroupstats_build(stats, fd_file(f)->f_path.dentry); if (rc < 0) { nlmsg_free(rep_skb); - goto err; + return rc; } - rc = send_reply(rep_skb, info); - -err: - fdput(f); - return rc; + return send_reply(rep_skb, info); } static int cmd_attr_register_cpumask(struct genl_info *info) diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 06132cf47016..db5e2dd7cec9 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -1062,19 +1062,16 @@ out: */ void ima_kexec_cmdline(int kernel_fd, const void *buf, int size) { - struct fd f; - if (!buf || !size) return; - f = fdget(kernel_fd); - if (!fd_file(f)) + CLASS(fd, f)(kernel_fd); + if (fd_empty(f)) return; process_buffer_measurement(file_mnt_idmap(fd_file(f)), file_inode(fd_file(f)), buf, size, "kexec-cmdline", KEXEC_CMDLINE, 0, NULL, false, NULL, 0); - fdput(f); } /** diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c index 02144ec39f43..68252452b66c 100644 --- a/security/loadpin/loadpin.c +++ b/security/loadpin/loadpin.c @@ -283,7 +283,6 @@ enum loadpin_securityfs_interface_index { static int read_trusted_verity_root_digests(unsigned int fd) { - struct fd f; void *data; int rc; char *p, *d; @@ -295,8 +294,8 @@ static int read_trusted_verity_root_digests(unsigned int fd) if (!list_empty(&dm_verity_loadpin_trusted_root_digests)) return -EPERM; - f = fdget(fd); - if (!fd_file(f)) + CLASS(fd, f)(fd); + if (fd_empty(f)) return -EINVAL; data = kzalloc(SZ_4K, GFP_KERNEL); @@ -359,7 +358,6 @@ static int read_trusted_verity_root_digests(unsigned int fd) } kfree(data); - fdput(f); return 0; @@ -379,8 +377,6 @@ err: /* disallow further attempts after reading a corrupt/invalid file */ deny_reading_verity_digests = true; - fdput(f); - return rc; } diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 53262b8a7656..72aa1fdeb699 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -229,14 +229,13 @@ static int kvm_vfio_file_set_spapr_tce(struct kvm_device *dev, struct kvm_vfio_spapr_tce param; struct kvm_vfio *kv = dev->private; struct kvm_vfio_file *kvf; - struct fd f; int ret; if (copy_from_user(¶m, arg, sizeof(struct kvm_vfio_spapr_tce))) return -EFAULT; - f = fdget(param.groupfd); - if (!fd_file(f)) + CLASS(fd, f)(param.groupfd); + if (fd_empty(f)) return -EBADF; ret = -ENOENT; @@ -262,7 +261,6 @@ static int kvm_vfio_file_set_spapr_tce(struct kvm_device *dev, err_fdput: mutex_unlock(&kv->lock); - fdput(f); return ret; } #endif -- cgit v1.2.3 From 0c133b1e78cd34dd9d18da707dc6f46170e9129e Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 23 Oct 2024 19:27:07 +0300 Subject: module: prepare to handle ROX allocations for text In order to support ROX allocations for module text, it is necessary to handle modifications to the code, such as relocations and alternatives patching, without write access to that memory. One option is to use text patching, but this would make module loading extremely slow and will expose executable code that is not finally formed. A better way is to have memory allocated with ROX permissions contain invalid instructions and keep a writable, but not executable copy of the module text. The relocations and alternative patches would be done on the writable copy using the addresses of the ROX memory. Once the module is completely ready, the updated text will be copied to ROX memory using text patching in one go and the writable copy will be freed. Add support for that to module initialization code and provide necessary interfaces in execmem. Link: https://lkml.kernel.org/r/20241023162711.2579610-5-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewd-by: Luis Chamberlain Tested-by: kdevops Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Brian Cain Cc: Catalin Marinas Cc: Christophe Leroy Cc: Christoph Hellwig Cc: Dave Hansen Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Oleg Nesterov Cc: Palmer Dabbelt Cc: Peter Zijlstra Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Stafford Horne Cc: Steven Rostedt (Google) Cc: Suren Baghdasaryan Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Uladzislau Rezki (Sony) Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/execmem.h | 23 +++++++++++++ include/linux/module.h | 16 +++++++++ include/linux/moduleloader.h | 4 +++ kernel/module/debug_kmemleak.c | 3 +- kernel/module/main.c | 74 ++++++++++++++++++++++++++++++++++++++---- kernel/module/strict_rwx.c | 3 ++ mm/execmem.c | 11 +++++++ 7 files changed, 126 insertions(+), 8 deletions(-) (limited to 'kernel/module') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 32cef1144117..dfdf19f8a5e8 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -46,9 +46,11 @@ enum execmem_type { /** * enum execmem_range_flags - options for executable memory allocations * @EXECMEM_KASAN_SHADOW: allocate kasan shadow + * @EXECMEM_ROX_CACHE: allocations should use ROX cache of huge pages */ enum execmem_range_flags { EXECMEM_KASAN_SHADOW = (1 << 0), + EXECMEM_ROX_CACHE = (1 << 1), }; /** @@ -123,6 +125,27 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +/** + * execmem_update_copy - copy an update to executable memory + * @dst: destination address to update + * @src: source address containing the data + * @size: how many bytes of memory shold be copied + * + * Copy @size bytes from @src to @dst using text poking if the memory at + * @dst is read-only. + * + * Return: a pointer to @dst or NULL on error + */ +void *execmem_update_copy(void *dst, const void *src, size_t size); + +/** + * execmem_is_rox - check if execmem is read-only + * @type - the execmem type to check + * + * Return: %true if the @type is read-only, %false if it's writable + */ +bool execmem_is_rox(enum execmem_type type); + #if defined(CONFIG_EXECMEM) && !defined(CONFIG_ARCH_WANTS_EXECMEM_LATE) void execmem_init(void); #else diff --git a/include/linux/module.h b/include/linux/module.h index 88ecc5e9f523..2a9386cbdf85 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -367,6 +367,8 @@ enum mod_mem_type { struct module_memory { void *base; + void *rw_copy; + bool is_rox; unsigned int size; #ifdef CONFIG_MODULES_TREE_LOOKUP @@ -767,6 +769,15 @@ static inline bool is_livepatch_module(struct module *mod) void set_module_sig_enforced(void); +void *__module_writable_address(struct module *mod, void *loc); + +static inline void *module_writable_address(struct module *mod, void *loc) +{ + if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod) + return loc; + return __module_writable_address(mod, loc); +} + #else /* !CONFIG_MODULES... */ static inline struct module *__module_address(unsigned long addr) @@ -874,6 +885,11 @@ static inline bool module_is_coming(struct module *mod) { return false; } + +static inline void *module_writable_address(struct module *mod, void *loc) +{ + return loc; +} #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index e395461d59e5..1f5507ba5a12 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -108,6 +108,10 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); +int module_post_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *mod); + #ifdef CONFIG_MODULES void flush_module_init_free_work(void); #else diff --git a/kernel/module/debug_kmemleak.c b/kernel/module/debug_kmemleak.c index b4cc03842d70..df873dad049d 100644 --- a/kernel/module/debug_kmemleak.c +++ b/kernel/module/debug_kmemleak.c @@ -14,7 +14,8 @@ void kmemleak_load_module(const struct module *mod, { /* only scan writable, non-executable sections */ for_each_mod_mem_type(type) { - if (type != MOD_DATA && type != MOD_INIT_DATA) + if (type != MOD_DATA && type != MOD_INIT_DATA && + !mod->mem[type].is_rox) kmemleak_no_scan(mod->mem[type].base); } } diff --git a/kernel/module/main.c b/kernel/module/main.c index 49b9bca9de12..73b588fe98d4 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1189,6 +1189,18 @@ void __weak module_arch_freeing_init(struct module *mod) { } +void *__module_writable_address(struct module *mod, void *loc) +{ + for_class_mod_mem_type(type, text) { + struct module_memory *mem = &mod->mem[type]; + + if (loc >= mem->base && loc < mem->base + mem->size) + return loc + (mem->rw_copy - mem->base); + } + + return loc; +} + static int module_memory_alloc(struct module *mod, enum mod_mem_type type) { unsigned int size = PAGE_ALIGN(mod->mem[type].size); @@ -1206,6 +1218,23 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) if (!ptr) return -ENOMEM; + mod->mem[type].base = ptr; + + if (execmem_is_rox(execmem_type)) { + ptr = vzalloc(size); + + if (!ptr) { + execmem_free(mod->mem[type].base); + return -ENOMEM; + } + + mod->mem[type].rw_copy = ptr; + mod->mem[type].is_rox = true; + } else { + mod->mem[type].rw_copy = mod->mem[type].base; + memset(mod->mem[type].base, 0, size); + } + /* * The pointer to these blocks of memory are stored on the module * structure and we keep that around so long as the module is @@ -1219,16 +1248,17 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) */ kmemleak_not_leak(ptr); - memset(ptr, 0, size); - mod->mem[type].base = ptr; - return 0; } static void module_memory_free(struct module *mod, enum mod_mem_type type, bool unload_codetags) { - void *ptr = mod->mem[type].base; + struct module_memory *mem = &mod->mem[type]; + void *ptr = mem->base; + + if (mem->is_rox) + vfree(mem->rw_copy); if (!unload_codetags && mod_mem_type_is_core_data(type)) return; @@ -2251,6 +2281,7 @@ static int move_module(struct module *mod, struct load_info *info) for_each_mod_mem_type(type) { if (!mod->mem[type].size) { mod->mem[type].base = NULL; + mod->mem[type].rw_copy = NULL; continue; } @@ -2267,11 +2298,14 @@ static int move_module(struct module *mod, struct load_info *info) void *dest; Elf_Shdr *shdr = &info->sechdrs[i]; enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; + unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK; + unsigned long addr; if (!(shdr->sh_flags & SHF_ALLOC)) continue; - dest = mod->mem[type].base + (shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK); + addr = (unsigned long)mod->mem[type].base + offset; + dest = mod->mem[type].rw_copy + offset; if (shdr->sh_type != SHT_NOBITS) { /* @@ -2293,7 +2327,7 @@ static int move_module(struct module *mod, struct load_info *info) * users of info can keep taking advantage and using the newly * minted official memory area. */ - shdr->sh_addr = (unsigned long)dest; + shdr->sh_addr = addr; pr_debug("\t0x%lx 0x%.8lx %s\n", (long)shdr->sh_addr, (long)shdr->sh_size, info->secstrings + shdr->sh_name); } @@ -2441,8 +2475,17 @@ int __weak module_finalize(const Elf_Ehdr *hdr, return 0; } +int __weak module_post_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + return 0; +} + static int post_relocation(struct module *mod, const struct load_info *info) { + int ret; + /* Sort exception table now relocations are done. */ sort_extable(mod->extable, mod->extable + mod->num_exentries); @@ -2454,7 +2497,24 @@ static int post_relocation(struct module *mod, const struct load_info *info) add_kallsyms(mod, info); /* Arch-specific module finalizing. */ - return module_finalize(info->hdr, info->sechdrs, mod); + ret = module_finalize(info->hdr, info->sechdrs, mod); + if (ret) + return ret; + + for_each_mod_mem_type(type) { + struct module_memory *mem = &mod->mem[type]; + + if (mem->is_rox) { + if (!execmem_update_copy(mem->base, mem->rw_copy, + mem->size)) + return -ENOMEM; + + vfree(mem->rw_copy); + mem->rw_copy = NULL; + } + } + + return module_post_finalize(info->hdr, info->sechdrs, mod); } /* Call module constructors. */ diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index c45caa4690e5..239e5013359d 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -34,6 +34,9 @@ int module_enable_text_rox(const struct module *mod) for_class_mod_mem_type(type, text) { int ret; + if (mod->mem[type].is_rox) + continue; + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) ret = module_set_memory(mod, type, set_memory_rox); else diff --git a/mm/execmem.c b/mm/execmem.c index 0c4b36bc6d10..0f6691e9ffe6 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -10,6 +10,7 @@ #include #include #include +#include static struct execmem_info *execmem_info __ro_after_init; static struct execmem_info default_execmem_info __ro_after_init; @@ -69,6 +70,16 @@ void execmem_free(void *ptr) vfree(ptr); } +void *execmem_update_copy(void *dst, const void *src, size_t size) +{ + return text_poke_copy(dst, src, size); +} + +bool execmem_is_rox(enum execmem_type type) +{ + return !!(execmem_info->ranges[type].flags & EXECMEM_ROX_CACHE); +} + static bool execmem_validate(struct execmem_info *info) { struct execmem_range *r = &info->ranges[EXECMEM_DEFAULT]; -- cgit v1.2.3 From 0db6f8d7820a4b788565dac8eed52bfc2c3216da Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 23 Oct 2024 10:07:56 -0700 Subject: alloc_tag: load module tags into separate contiguous memory When a module gets unloaded there is a possibility that some of the allocations it made are still used and therefore the allocation tags corresponding to these allocations are still referenced. As such, the memory for these tags can't be freed. This is currently handled as an abnormal situation and module's data section is not being unloaded. To handle this situation without keeping module's data in memory, allow codetags with longer lifespan than the module to be loaded into their own separate memory. The in-use memory areas and gaps after module unloading in this separate memory are tracked using maple trees. Allocation tags arrange their separate memory so that it is virtually contiguous and that will allow simple allocation tag indexing later on in this patchset. The size of this virtually contiguous memory is set to store up to 100000 allocation tags. [surenb@google.com: fix empty codetag module section handling] Link: https://lkml.kernel.org/r/20241101000017.3856204-1-surenb@google.com [akpm@linux-foundation.org: update comment, per Dan] Link: https://lkml.kernel.org/r/20241023170759.999909-4-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Pasha Tatashin Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Christoph Hellwig Cc: Daniel Gomez Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: David Rientjes Cc: Dennis Zhou Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joonsoo Kim Cc: Kalesh Singh Cc: Kees Cook Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Luis Chamberlain Cc: Matthew Wilcox Cc: Michal Hocko Cc: Mike Rapoport (Microsoft) Cc: Minchan Kim Cc: Paul E. McKenney Cc: Petr Pavlu Cc: Roman Gushchin Cc: Sami Tolvanen Cc: Sourav Panda Cc: Steven Rostedt (Google) Cc: Thomas Gleixner Cc: Thomas Huth Cc: Uladzislau Rezki (Sony) Cc: Vlastimil Babka Cc: Xiongwei Song Cc: Yu Zhao Cc: Dan Carpenter Signed-off-by: Andrew Morton --- include/asm-generic/codetag.lds.h | 19 +++ include/linux/alloc_tag.h | 13 +- include/linux/codetag.h | 37 +++++- kernel/module/main.c | 84 +++++++++---- lib/alloc_tag.c | 249 +++++++++++++++++++++++++++++++++++--- lib/codetag.c | 100 +++++++++++++-- scripts/module.lds.S | 5 +- 7 files changed, 445 insertions(+), 62 deletions(-) (limited to 'kernel/module') diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h index 64f536b80380..372c320c5043 100644 --- a/include/asm-generic/codetag.lds.h +++ b/include/asm-generic/codetag.lds.h @@ -11,4 +11,23 @@ #define CODETAG_SECTIONS() \ SECTION_WITH_BOUNDARIES(alloc_tags) +/* + * Module codetags which aren't used after module unload, therefore have the + * same lifespan as the module and can be safely unloaded with the module. + */ +#define MOD_CODETAG_SECTIONS() + +#define MOD_SEPARATE_CODETAG_SECTION(_name) \ + .codetag.##_name : { \ + SECTION_WITH_BOUNDARIES(_name) \ + } + +/* + * For codetags which might be used after module unload, therefore might stay + * longer in memory. Each such codetag type has its own section so that we can + * unload them individually once unused. + */ +#define MOD_SEPARATE_CODETAG_SECTIONS() \ + MOD_SEPARATE_CODETAG_SECTION(alloc_tags) + #endif /* __ASM_GENERIC_CODETAG_LDS_H */ diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index 941deffc590d..55d30543c4c7 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -30,6 +30,13 @@ struct alloc_tag { struct alloc_tag_counters __percpu *counters; } __aligned(8); +struct alloc_tag_module_section { + unsigned long start_addr; + unsigned long end_addr; + /* used size */ + unsigned long size; +}; + #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG #define CODETAG_EMPTY ((void *)1) @@ -54,6 +61,8 @@ static inline void set_codetag_empty(union codetag_ref *ref) {} #ifdef CONFIG_MEM_ALLOC_PROFILING +#define ALLOC_TAG_SECTION_NAME "alloc_tags" + struct codetag_bytes { struct codetag *ct; s64 bytes; @@ -76,7 +85,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #define DEFINE_ALLOC_TAG(_alloc_tag) \ static struct alloc_tag _alloc_tag __used __aligned(8) \ - __section("alloc_tags") = { \ + __section(ALLOC_TAG_SECTION_NAME) = { \ .ct = CODE_TAG_INIT, \ .counters = &_shared_alloc_tag }; @@ -85,7 +94,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #define DEFINE_ALLOC_TAG(_alloc_tag) \ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ static struct alloc_tag _alloc_tag __used __aligned(8) \ - __section("alloc_tags") = { \ + __section(ALLOC_TAG_SECTION_NAME) = { \ .ct = CODE_TAG_INIT, \ .counters = &_alloc_tag_cntr }; diff --git a/include/linux/codetag.h b/include/linux/codetag.h index c2a579ccd455..d10bd9810d32 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -35,8 +35,15 @@ struct codetag_type_desc { size_t tag_size; void (*module_load)(struct codetag_type *cttype, struct codetag_module *cmod); - bool (*module_unload)(struct codetag_type *cttype, + void (*module_unload)(struct codetag_type *cttype, struct codetag_module *cmod); +#ifdef CONFIG_MODULES + void (*module_replaced)(struct module *mod, struct module *new_mod); + bool (*needs_section_mem)(struct module *mod, unsigned long size); + void *(*alloc_section_mem)(struct module *mod, unsigned long size, + unsigned int prepend, unsigned long align); + void (*free_section_mem)(struct module *mod, bool used); +#endif }; struct codetag_iterator { @@ -71,11 +78,31 @@ struct codetag_type * codetag_register_type(const struct codetag_type_desc *desc); #if defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) + +bool codetag_needs_module_section(struct module *mod, const char *name, + unsigned long size); +void *codetag_alloc_module_section(struct module *mod, const char *name, + unsigned long size, unsigned int prepend, + unsigned long align); +void codetag_free_module_sections(struct module *mod); +void codetag_module_replaced(struct module *mod, struct module *new_mod); void codetag_load_module(struct module *mod); -bool codetag_unload_module(struct module *mod); -#else +void codetag_unload_module(struct module *mod); + +#else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ + +static inline bool +codetag_needs_module_section(struct module *mod, const char *name, + unsigned long size) { return false; } +static inline void * +codetag_alloc_module_section(struct module *mod, const char *name, + unsigned long size, unsigned int prepend, + unsigned long align) { return NULL; } +static inline void codetag_free_module_sections(struct module *mod) {} +static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {} static inline void codetag_load_module(struct module *mod) {} -static inline bool codetag_unload_module(struct module *mod) { return true; } -#endif +static inline void codetag_unload_module(struct module *mod) {} + +#endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ #endif /* _LINUX_CODETAG_H */ diff --git a/kernel/module/main.c b/kernel/module/main.c index 73b588fe98d4..00c16f5c5568 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1251,22 +1251,17 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) return 0; } -static void module_memory_free(struct module *mod, enum mod_mem_type type, - bool unload_codetags) +static void module_memory_free(struct module *mod, enum mod_mem_type type) { struct module_memory *mem = &mod->mem[type]; - void *ptr = mem->base; if (mem->is_rox) vfree(mem->rw_copy); - if (!unload_codetags && mod_mem_type_is_core_data(type)) - return; - - execmem_free(ptr); + execmem_free(mem->base); } -static void free_mod_mem(struct module *mod, bool unload_codetags) +static void free_mod_mem(struct module *mod) { for_each_mod_mem_type(type) { struct module_memory *mod_mem = &mod->mem[type]; @@ -1277,25 +1272,20 @@ static void free_mod_mem(struct module *mod, bool unload_codetags) /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod_mem->base, mod_mem->size); if (mod_mem->size) - module_memory_free(mod, type, unload_codetags); + module_memory_free(mod, type); } /* MOD_DATA hosts mod, so free it at last */ lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size); - module_memory_free(mod, MOD_DATA, unload_codetags); + module_memory_free(mod, MOD_DATA); } /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { - bool unload_codetags; - trace_module_free(mod); - unload_codetags = codetag_unload_module(mod); - if (!unload_codetags) - pr_warn("%s: memory allocation(s) from the module still alive, cannot unload cleanly\n", - mod->name); + codetag_unload_module(mod); mod_sysfs_teardown(mod); @@ -1338,7 +1328,7 @@ static void free_module(struct module *mod) kfree(mod->args); percpu_modfree(mod); - free_mod_mem(mod, unload_codetags); + free_mod_mem(mod); } void *__symbol_get(const char *symbol) @@ -1603,6 +1593,20 @@ static void __layout_sections(struct module *mod, struct load_info *info, bool i if (WARN_ON_ONCE(type == MOD_INVALID)) continue; + /* + * Do not allocate codetag memory as we load it into + * preallocated contiguous memory. + */ + if (codetag_needs_module_section(mod, sname, s->sh_size)) { + /* + * s->sh_entsize won't be used but populate the + * type field to avoid confusion. + */ + s->sh_entsize = ((unsigned long)(type) & SH_ENTSIZE_TYPE_MASK) + << SH_ENTSIZE_TYPE_SHIFT; + continue; + } + s->sh_entsize = module_get_offset_and_type(mod, type, s, i); pr_debug("\t%s\n", sname); } @@ -2277,6 +2281,7 @@ static int move_module(struct module *mod, struct load_info *info) int i; enum mod_mem_type t = 0; int ret = -ENOMEM; + bool codetag_section_found = false; for_each_mod_mem_type(type) { if (!mod->mem[type].size) { @@ -2288,7 +2293,7 @@ static int move_module(struct module *mod, struct load_info *info) ret = module_memory_alloc(mod, type); if (ret) { t = type; - goto out_enomem; + goto out_err; } } @@ -2297,15 +2302,37 @@ static int move_module(struct module *mod, struct load_info *info) for (i = 0; i < info->hdr->e_shnum; i++) { void *dest; Elf_Shdr *shdr = &info->sechdrs[i]; - enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; - unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK; + const char *sname; unsigned long addr; if (!(shdr->sh_flags & SHF_ALLOC)) continue; - addr = (unsigned long)mod->mem[type].base + offset; - dest = mod->mem[type].rw_copy + offset; + sname = info->secstrings + shdr->sh_name; + /* + * Load codetag sections separately as they might still be used + * after module unload. + */ + if (codetag_needs_module_section(mod, sname, shdr->sh_size)) { + dest = codetag_alloc_module_section(mod, sname, shdr->sh_size, + arch_mod_section_prepend(mod, i), shdr->sh_addralign); + if (WARN_ON(!dest)) { + ret = -EINVAL; + goto out_err; + } + if (IS_ERR(dest)) { + ret = PTR_ERR(dest); + goto out_err; + } + addr = (unsigned long)dest; + codetag_section_found = true; + } else { + enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; + unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK; + + addr = (unsigned long)mod->mem[type].base + offset; + dest = mod->mem[type].rw_copy + offset; + } if (shdr->sh_type != SHT_NOBITS) { /* @@ -2317,7 +2344,7 @@ static int move_module(struct module *mod, struct load_info *info) if (i == info->index.mod && (WARN_ON_ONCE(shdr->sh_size != sizeof(struct module)))) { ret = -ENOEXEC; - goto out_enomem; + goto out_err; } memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); } @@ -2333,9 +2360,12 @@ static int move_module(struct module *mod, struct load_info *info) } return 0; -out_enomem: +out_err: for (t--; t >= 0; t--) - module_memory_free(mod, t, true); + module_memory_free(mod, t); + if (codetag_section_found) + codetag_free_module_sections(mod); + return ret; } @@ -2456,6 +2486,8 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) /* Module has been copied to its final place now: return it. */ mod = (void *)info->sechdrs[info->index.mod].sh_addr; kmemleak_load_module(mod, info); + codetag_module_replaced(info->mod, mod); + return mod; } @@ -2465,7 +2497,7 @@ static void module_deallocate(struct module *mod, struct load_info *info) percpu_modfree(mod); module_arch_freeing_init(mod); - free_mod_mem(mod, true); + free_mod_mem(mod); } int __weak module_finalize(const Elf_Ehdr *hdr, diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 435aa837e550..5f9cd1642d58 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include #include #include #include @@ -9,6 +10,7 @@ #include #define ALLOCINFO_FILE_NAME "allocinfo" +#define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) #ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT static bool mem_profiling_support __meminitdata = true; @@ -174,31 +176,226 @@ static void __init procfs_init(void) } } -static bool alloc_tag_module_unload(struct codetag_type *cttype, - struct codetag_module *cmod) +#ifdef CONFIG_MODULES + +static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE); +/* A dummy object used to indicate an unloaded module */ +static struct module unloaded_mod; +/* A dummy object used to indicate a module prepended area */ +static struct module prepend_mod; + +static struct alloc_tag_module_section module_tags; + +static bool needs_section_mem(struct module *mod, unsigned long size) { - struct codetag_iterator iter = codetag_get_ct_iter(cttype); - struct alloc_tag_counters counter; - bool module_unused = true; - struct alloc_tag *tag; - struct codetag *ct; + return size >= sizeof(struct alloc_tag); +} + +static struct alloc_tag *find_used_tag(struct alloc_tag *from, struct alloc_tag *to) +{ + while (from <= to) { + struct alloc_tag_counters counter; - for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) { - if (iter.cmod != cmod) + counter = alloc_tag_read(from); + if (counter.bytes) + return from; + from++; + } + + return NULL; +} + +/* Called with mod_area_mt locked */ +static void clean_unused_module_areas_locked(void) +{ + MA_STATE(mas, &mod_area_mt, 0, module_tags.size); + struct module *val; + + mas_for_each(&mas, val, module_tags.size) { + if (val != &unloaded_mod) continue; - tag = ct_to_alloc_tag(ct); - counter = alloc_tag_read(tag); + /* Release area if all tags are unused */ + if (!find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), + (struct alloc_tag *)(module_tags.start_addr + mas.last))) + mas_erase(&mas); + } +} + +/* Called with mod_area_mt locked */ +static bool find_aligned_area(struct ma_state *mas, unsigned long section_size, + unsigned long size, unsigned int prepend, unsigned long align) +{ + bool cleanup_done = false; + +repeat: + /* Try finding exact size and hope the start is aligned */ + if (!mas_empty_area(mas, 0, section_size - 1, prepend + size)) { + if (IS_ALIGNED(mas->index + prepend, align)) + return true; + + /* Try finding larger area to align later */ + mas_reset(mas); + if (!mas_empty_area(mas, 0, section_size - 1, + size + prepend + align - 1)) + return true; + } + + /* No free area, try cleanup stale data and repeat the search once */ + if (!cleanup_done) { + clean_unused_module_areas_locked(); + cleanup_done = true; + mas_reset(mas); + goto repeat; + } + + return false; +} + +static void *reserve_module_tags(struct module *mod, unsigned long size, + unsigned int prepend, unsigned long align) +{ + unsigned long section_size = module_tags.end_addr - module_tags.start_addr; + MA_STATE(mas, &mod_area_mt, 0, section_size - 1); + unsigned long offset; + void *ret = NULL; + + /* If no tags return error */ + if (size < sizeof(struct alloc_tag)) + return ERR_PTR(-EINVAL); + + /* + * align is always power of 2, so we can use IS_ALIGNED and ALIGN. + * align 0 or 1 means no alignment, to simplify set to 1. + */ + if (!align) + align = 1; + + mas_lock(&mas); + if (!find_aligned_area(&mas, section_size, size, prepend, align)) { + ret = ERR_PTR(-ENOMEM); + goto unlock; + } + + /* Mark found area as reserved */ + offset = mas.index; + offset += prepend; + offset = ALIGN(offset, align); + if (offset != mas.index) { + unsigned long pad_start = mas.index; + + mas.last = offset - 1; + mas_store(&mas, &prepend_mod); + if (mas_is_err(&mas)) { + ret = ERR_PTR(xa_err(mas.node)); + goto unlock; + } + mas.index = offset; + mas.last = offset + size - 1; + mas_store(&mas, mod); + if (mas_is_err(&mas)) { + mas.index = pad_start; + mas_erase(&mas); + ret = ERR_PTR(xa_err(mas.node)); + } + } else { + mas.last = offset + size - 1; + mas_store(&mas, mod); + if (mas_is_err(&mas)) + ret = ERR_PTR(xa_err(mas.node)); + } +unlock: + mas_unlock(&mas); + + if (IS_ERR(ret)) + return ret; - if (WARN(counter.bytes, - "%s:%u module %s func:%s has %llu allocated at module unload", - ct->filename, ct->lineno, ct->modname, ct->function, counter.bytes)) - module_unused = false; + if (module_tags.size < offset + size) + module_tags.size = offset + size; + + return (struct alloc_tag *)(module_tags.start_addr + offset); +} + +static void release_module_tags(struct module *mod, bool used) +{ + MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size); + struct alloc_tag *tag; + struct module *val; + + mas_lock(&mas); + mas_for_each_rev(&mas, val, 0) + if (val == mod) + break; + + if (!val) /* module not found */ + goto out; + + if (!used) + goto release_area; + + /* Find out if the area is used */ + tag = find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), + (struct alloc_tag *)(module_tags.start_addr + mas.last)); + if (tag) { + struct alloc_tag_counters counter = alloc_tag_read(tag); + + pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n", + tag->ct.filename, tag->ct.lineno, tag->ct.modname, + tag->ct.function, counter.bytes); + } else { + used = false; + } +release_area: + mas_store(&mas, used ? &unloaded_mod : NULL); + val = mas_prev_range(&mas, 0); + if (val == &prepend_mod) + mas_store(&mas, NULL); +out: + mas_unlock(&mas); +} + +static void replace_module(struct module *mod, struct module *new_mod) +{ + MA_STATE(mas, &mod_area_mt, 0, module_tags.size); + struct module *val; + + mas_lock(&mas); + mas_for_each(&mas, val, module_tags.size) { + if (val != mod) + continue; + + mas_store_gfp(&mas, new_mod, GFP_KERNEL); + break; } + mas_unlock(&mas); +} + +static int __init alloc_mod_tags_mem(void) +{ + /* Allocate space to copy allocation tags */ + module_tags.start_addr = (unsigned long)execmem_alloc(EXECMEM_MODULE_DATA, + MODULE_ALLOC_TAG_VMAP_SIZE); + if (!module_tags.start_addr) + return -ENOMEM; + + module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE; + + return 0; +} - return module_unused; +static void __init free_mod_tags_mem(void) +{ + execmem_free((void *)module_tags.start_addr); + module_tags.start_addr = 0; } +#else /* CONFIG_MODULES */ + +static inline int alloc_mod_tags_mem(void) { return 0; } +static inline void free_mod_tags_mem(void) {} + +#endif /* CONFIG_MODULES */ + static int __init setup_early_mem_profiling(char *str) { bool enable; @@ -274,14 +471,26 @@ static inline void sysctl_init(void) {} static int __init alloc_tag_init(void) { const struct codetag_type_desc desc = { - .section = "alloc_tags", - .tag_size = sizeof(struct alloc_tag), - .module_unload = alloc_tag_module_unload, + .section = ALLOC_TAG_SECTION_NAME, + .tag_size = sizeof(struct alloc_tag), +#ifdef CONFIG_MODULES + .needs_section_mem = needs_section_mem, + .alloc_section_mem = reserve_module_tags, + .free_section_mem = release_module_tags, + .module_replaced = replace_module, +#endif }; + int res; + + res = alloc_mod_tags_mem(); + if (res) + return res; alloc_tag_cttype = codetag_register_type(&desc); - if (IS_ERR(alloc_tag_cttype)) + if (IS_ERR(alloc_tag_cttype)) { + free_mod_tags_mem(); return PTR_ERR(alloc_tag_cttype); + } sysctl_init(); procfs_init(); diff --git a/lib/codetag.c b/lib/codetag.c index d1fbbb7c2ec3..7455b966cae4 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -207,6 +207,94 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod) } #ifdef CONFIG_MODULES +#define CODETAG_SECTION_PREFIX ".codetag." + +/* Some codetag types need a separate module section */ +bool codetag_needs_module_section(struct module *mod, const char *name, + unsigned long size) +{ + const char *type_name; + struct codetag_type *cttype; + bool ret = false; + + if (strncmp(name, CODETAG_SECTION_PREFIX, strlen(CODETAG_SECTION_PREFIX))) + return false; + + type_name = name + strlen(CODETAG_SECTION_PREFIX); + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (strcmp(type_name, cttype->desc.section) == 0) { + if (!cttype->desc.needs_section_mem) + break; + + down_write(&cttype->mod_lock); + ret = cttype->desc.needs_section_mem(mod, size); + up_write(&cttype->mod_lock); + break; + } + } + mutex_unlock(&codetag_lock); + + return ret; +} + +void *codetag_alloc_module_section(struct module *mod, const char *name, + unsigned long size, unsigned int prepend, + unsigned long align) +{ + const char *type_name = name + strlen(CODETAG_SECTION_PREFIX); + struct codetag_type *cttype; + void *ret = ERR_PTR(-EINVAL); + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (strcmp(type_name, cttype->desc.section) == 0) { + if (WARN_ON(!cttype->desc.alloc_section_mem)) + break; + + down_write(&cttype->mod_lock); + ret = cttype->desc.alloc_section_mem(mod, size, prepend, align); + up_write(&cttype->mod_lock); + break; + } + } + mutex_unlock(&codetag_lock); + + return ret; +} + +void codetag_free_module_sections(struct module *mod) +{ + struct codetag_type *cttype; + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (!cttype->desc.free_section_mem) + continue; + + down_write(&cttype->mod_lock); + cttype->desc.free_section_mem(mod, false); + up_write(&cttype->mod_lock); + } + mutex_unlock(&codetag_lock); +} + +void codetag_module_replaced(struct module *mod, struct module *new_mod) +{ + struct codetag_type *cttype; + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (!cttype->desc.module_replaced) + continue; + + down_write(&cttype->mod_lock); + cttype->desc.module_replaced(mod, new_mod); + up_write(&cttype->mod_lock); + } + mutex_unlock(&codetag_lock); +} + void codetag_load_module(struct module *mod) { struct codetag_type *cttype; @@ -220,13 +308,12 @@ void codetag_load_module(struct module *mod) mutex_unlock(&codetag_lock); } -bool codetag_unload_module(struct module *mod) +void codetag_unload_module(struct module *mod) { struct codetag_type *cttype; - bool unload_ok = true; if (!mod) - return true; + return; /* await any module's kfree_rcu() operations to complete */ kvfree_rcu_barrier(); @@ -246,18 +333,17 @@ bool codetag_unload_module(struct module *mod) } if (found) { if (cttype->desc.module_unload) - if (!cttype->desc.module_unload(cttype, cmod)) - unload_ok = false; + cttype->desc.module_unload(cttype, cmod); cttype->count -= range_size(cttype, &cmod->range); idr_remove(&cttype->mod_idr, mod_id); kfree(cmod); } up_write(&cttype->mod_lock); + if (found && cttype->desc.free_section_mem) + cttype->desc.free_section_mem(mod, true); } mutex_unlock(&codetag_lock); - - return unload_ok; } #endif /* CONFIG_MODULES */ diff --git a/scripts/module.lds.S b/scripts/module.lds.S index 3f43edef813c..711c6e029936 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -50,7 +50,7 @@ SECTIONS { .data : { *(.data .data.[0-9a-zA-Z_]*) *(.data..L*) - CODETAG_SECTIONS() + MOD_CODETAG_SECTIONS() } .rodata : { @@ -59,9 +59,10 @@ SECTIONS { } #else .data : { - CODETAG_SECTIONS() + MOD_CODETAG_SECTIONS() } #endif + MOD_SEPARATE_CODETAG_SECTIONS() } /* bring in arch-specific sections */ -- cgit v1.2.3