diff options
Diffstat (limited to 'kernel/sys.c')
| -rw-r--r-- | kernel/sys.c | 180 |
1 files changed, 112 insertions, 68 deletions
diff --git a/kernel/sys.c b/kernel/sys.c index adc0de0aa364..8b58eece4e58 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -181,6 +181,35 @@ int fs_overflowgid = DEFAULT_FS_OVERFLOWGID; EXPORT_SYMBOL(fs_overflowuid); EXPORT_SYMBOL(fs_overflowgid); +static const struct ctl_table overflow_sysctl_table[] = { + { + .procname = "overflowuid", + .data = &overflowuid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_MAXOLDUID, + }, + { + .procname = "overflowgid", + .data = &overflowgid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_MAXOLDUID, + }, +}; + +static int __init init_overflow_sysctl(void) +{ + register_sysctl_init("kernel", overflow_sysctl_table); + return 0; +} + +postcore_initcall(init_overflow_sysctl); + /* * Returns true if current's euid is same as p's uid or euid, * or has CAP_SYS_NICE to p's user_ns. @@ -1705,6 +1734,7 @@ SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, struct rlimit old, new; struct task_struct *tsk; unsigned int checkflags = 0; + bool need_tasklist; int ret; if (old_rlim) @@ -1731,8 +1761,25 @@ SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, get_task_struct(tsk); rcu_read_unlock(); - ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, - old_rlim ? &old : NULL); + need_tasklist = !same_thread_group(tsk, current); + if (need_tasklist) { + /* + * Ensure we can't race with group exit or de_thread(), + * so tsk->group_leader can't be freed or changed until + * read_unlock(tasklist_lock) below. + */ + read_lock(&tasklist_lock); + if (!pid_alive(tsk)) + ret = -ESRCH; + } + + if (!ret) { + ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, + old_rlim ? &old : NULL); + } + + if (need_tasklist) + read_unlock(&tasklist_lock); if (!ret && old_rlim) { rlim_to_rlim64(&old, &old64); @@ -2343,54 +2390,14 @@ int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long st #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) -#ifdef CONFIG_ANON_VMA_NAME - -#define ANON_VMA_NAME_MAX_LEN 80 -#define ANON_VMA_NAME_INVALID_CHARS "\\`$[]" - -static inline bool is_valid_name_char(char ch) -{ - /* printable ascii characters, excluding ANON_VMA_NAME_INVALID_CHARS */ - return ch > 0x1f && ch < 0x7f && - !strchr(ANON_VMA_NAME_INVALID_CHARS, ch); -} - static int prctl_set_vma(unsigned long opt, unsigned long addr, unsigned long size, unsigned long arg) { - struct mm_struct *mm = current->mm; - const char __user *uname; - struct anon_vma_name *anon_name = NULL; int error; switch (opt) { case PR_SET_VMA_ANON_NAME: - uname = (const char __user *)arg; - if (uname) { - char *name, *pch; - - name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN); - if (IS_ERR(name)) - return PTR_ERR(name); - - for (pch = name; *pch != '\0'; pch++) { - if (!is_valid_name_char(*pch)) { - kfree(name); - return -EINVAL; - } - } - /* anon_vma has its own copy */ - anon_name = anon_vma_name_alloc(name); - kfree(name); - if (!anon_name) - return -ENOMEM; - - } - - mmap_write_lock(mm); - error = madvise_set_anon_name(mm, addr, size, anon_name); - mmap_write_unlock(mm); - anon_vma_name_put(anon_name); + error = set_anon_vma_name(addr, size, (const char __user *)arg); break; default: error = -EINVAL; @@ -2399,21 +2406,13 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr, return error; } -#else /* CONFIG_ANON_VMA_NAME */ -static int prctl_set_vma(unsigned long opt, unsigned long start, - unsigned long size, unsigned long arg) -{ - return -EINVAL; -} -#endif /* CONFIG_ANON_VMA_NAME */ - static inline unsigned long get_current_mdwe(void) { unsigned long ret = 0; - if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) + if (mm_flags_test(MMF_HAS_MDWE, current->mm)) ret |= PR_MDWE_REFUSE_EXEC_GAIN; - if (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags)) + if (mm_flags_test(MMF_HAS_MDWE_NO_INHERIT, current->mm)) ret |= PR_MDWE_NO_INHERIT; return ret; @@ -2446,9 +2445,9 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3, return -EPERM; /* Cannot unset the flags */ if (bits & PR_MDWE_NO_INHERIT) - set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags); + mm_flags_set(MMF_HAS_MDWE_NO_INHERIT, current->mm); if (bits & PR_MDWE_REFUSE_EXEC_GAIN) - set_bit(MMF_HAS_MDWE, ¤t->mm->flags); + mm_flags_set(MMF_HAS_MDWE, current->mm); return 0; } @@ -2471,6 +2470,51 @@ static int prctl_get_auxv(void __user *addr, unsigned long len) return sizeof(mm->saved_auxv); } +static int prctl_get_thp_disable(unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + struct mm_struct *mm = current->mm; + + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + + /* If disabled, we return "1 | flags", otherwise 0. */ + if (mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm)) + return 1; + else if (mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, mm)) + return 1 | PR_THP_DISABLE_EXCEPT_ADVISED; + return 0; +} + +static int prctl_set_thp_disable(bool thp_disable, unsigned long flags, + unsigned long arg4, unsigned long arg5) +{ + struct mm_struct *mm = current->mm; + + if (arg4 || arg5) + return -EINVAL; + + /* Flags are only allowed when disabling. */ + if ((!thp_disable && flags) || (flags & ~PR_THP_DISABLE_EXCEPT_ADVISED)) + return -EINVAL; + if (mmap_write_lock_killable(current->mm)) + return -EINTR; + if (thp_disable) { + if (flags & PR_THP_DISABLE_EXCEPT_ADVISED) { + mm_flags_clear(MMF_DISABLE_THP_COMPLETELY, mm); + mm_flags_set(MMF_DISABLE_THP_EXCEPT_ADVISED, mm); + } else { + mm_flags_set(MMF_DISABLE_THP_COMPLETELY, mm); + mm_flags_clear(MMF_DISABLE_THP_EXCEPT_ADVISED, mm); + } + } else { + mm_flags_clear(MMF_DISABLE_THP_COMPLETELY, mm); + mm_flags_clear(MMF_DISABLE_THP_EXCEPT_ADVISED, mm); + } + mmap_write_unlock(current->mm); + return 0; +} + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2489,7 +2533,17 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = -EINVAL; break; } + /* + * Ensure that either: + * + * 1. Subsequent getppid() calls reflect the parent process having died. + * 2. forget_original_parent() will send the new me->pdeath_signal. + * + * Also prevent the read of me->pdeath_signal from being a data race. + */ + read_lock(&tasklist_lock); me->pdeath_signal = arg2; + read_unlock(&tasklist_lock); break; case PR_GET_PDEATHSIG: error = put_user(me->pdeath_signal, (int __user *)arg2); @@ -2644,20 +2698,10 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; return task_no_new_privs(current) ? 1 : 0; case PR_GET_THP_DISABLE: - if (arg2 || arg3 || arg4 || arg5) - return -EINVAL; - error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags); + error = prctl_get_thp_disable(arg2, arg3, arg4, arg5); break; case PR_SET_THP_DISABLE: - if (arg3 || arg4 || arg5) - return -EINVAL; - if (mmap_write_lock_killable(me->mm)) - return -EINTR; - if (arg2) - set_bit(MMF_DISABLE_THP, &me->mm->flags); - else - clear_bit(MMF_DISABLE_THP, &me->mm->flags); - mmap_write_unlock(me->mm); + error = prctl_set_thp_disable(arg2, arg3, arg4, arg5); break; case PR_MPX_ENABLE_MANAGEMENT: case PR_MPX_DISABLE_MANAGEMENT: @@ -2789,7 +2833,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, if (arg2 || arg3 || arg4 || arg5) return -EINVAL; - error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags); + error = !!mm_flags_test(MMF_VM_MERGE_ANY, me->mm); break; #endif case PR_RISCV_V_SET_CONTROL: |
