From 0e25498f8cd43c1b5aa327f373dd094e9a006da7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 28 Jun 2021 14:52:01 -0500
Subject: exit: Add and use make_task_dead.

There are two big uses of do_exit.  The first is it's design use to be
the guts of the exit(2) system call.  The second use is to terminate
a task after something catastrophic has happened like a NULL pointer
in kernel code.

Add a function make_task_dead that is initialy exactly the same as
do_exit to cover the cases where do_exit is called to handle
catastrophic failure.  In time this can probably be reduced to just a
light wrapper around do_task_dead. For now keep it exactly the same so
that there will be no behavioral differences introducing this new
concept.

Replace all of the uses of do_exit that use it for catastraphic
task cleanup with make_task_dead to make it clear what the code
is doing.

As part of this rename rewind_stack_do_exit
rewind_stack_and_make_dead.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/exit.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'kernel/exit.c')

diff --git a/kernel/exit.c b/kernel/exit.c
index f702a6a63686..bfa513c5b227 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -884,6 +884,15 @@ void __noreturn do_exit(long code)
 }
 EXPORT_SYMBOL_GPL(do_exit);
 
+void __noreturn make_task_dead(int signr)
+{
+	/*
+	 * Take the task off the cpu after something catastrophic has
+	 * happened.
+	 */
+	do_exit(signr);
+}
+
 void complete_and_exit(struct completion *comp, long code)
 {
 	if (comp)
-- 
cgit v1.3


From 05ea0424f0e21c0ef9b47c89826e7c22ae137975 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 22 Nov 2021 09:33:00 -0600
Subject: exit: Move oops specific logic from do_exit into make_task_dead

The beginning of do_exit has become cluttered and difficult to read as
it is filled with checks to handle things that can only happen when
the kernel is operating improperly.

Now that we have a dedicated function for cleaning up a task when the
kernel is operating improperly move the checks there.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/exit.c       | 78 ++++++++++++++++++++++++++---------------------------
 kernel/futex/core.c |  2 +-
 kernel/kexec_core.c |  2 +-
 3 files changed, 41 insertions(+), 41 deletions(-)

(limited to 'kernel/exit.c')

diff --git a/kernel/exit.c b/kernel/exit.c
index bfa513c5b227..d0ec6f6b41cb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -735,36 +735,8 @@ void __noreturn do_exit(long code)
 	struct task_struct *tsk = current;
 	int group_dead;
 
-	/*
-	 * We can get here from a kernel oops, sometimes with preemption off.
-	 * Start by checking for critical errors.
-	 * Then fix up important state like USER_DS and preemption.
-	 * Then do everything else.
-	 */
-
 	WARN_ON(blk_needs_flush_plug(tsk));
 
-	if (unlikely(in_interrupt()))
-		panic("Aiee, killing interrupt handler!");
-	if (unlikely(!tsk->pid))
-		panic("Attempted to kill the idle task!");
-
-	/*
-	 * If do_exit is called because this processes oopsed, it's possible
-	 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
-	 * continuing. Amongst other possible reasons, this is to prevent
-	 * mm_release()->clear_child_tid() from writing to a user-controlled
-	 * kernel address.
-	 */
-	force_uaccess_begin();
-
-	if (unlikely(in_atomic())) {
-		pr_info("note: %s[%d] exited with preempt_count %d\n",
-			current->comm, task_pid_nr(current),
-			preempt_count());
-		preempt_count_set(PREEMPT_ENABLED);
-	}
-
 	profile_task_exit(tsk);
 	kcov_task_exit(tsk);
 
@@ -773,17 +745,6 @@ void __noreturn do_exit(long code)
 
 	validate_creds_for_do_exit(tsk);
 
-	/*
-	 * We're taking recursive faults here in do_exit. Safest is to just
-	 * leave this task alone and wait for reboot.
-	 */
-	if (unlikely(tsk->flags & PF_EXITING)) {
-		pr_alert("Fixing recursive fault but reboot is needed!\n");
-		futex_exit_recursive(tsk);
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule();
-	}
-
 	io_uring_files_cancel();
 	exit_signals(tsk);  /* sets PF_EXITING */
 
@@ -889,7 +850,46 @@ void __noreturn make_task_dead(int signr)
 	/*
 	 * Take the task off the cpu after something catastrophic has
 	 * happened.
+	 *
+	 * We can get here from a kernel oops, sometimes with preemption off.
+	 * Start by checking for critical errors.
+	 * Then fix up important state like USER_DS and preemption.
+	 * Then do everything else.
 	 */
+	struct task_struct *tsk = current;
+
+	if (unlikely(in_interrupt()))
+		panic("Aiee, killing interrupt handler!");
+	if (unlikely(!tsk->pid))
+		panic("Attempted to kill the idle task!");
+
+	/*
+	 * If make_task_dead is called because this processes oopsed, it's possible
+	 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
+	 * continuing. Amongst other possible reasons, this is to prevent
+	 * mm_release()->clear_child_tid() from writing to a user-controlled
+	 * kernel address.
+	 */
+	force_uaccess_begin();
+
+	if (unlikely(in_atomic())) {
+		pr_info("note: %s[%d] exited with preempt_count %d\n",
+			current->comm, task_pid_nr(current),
+			preempt_count());
+		preempt_count_set(PREEMPT_ENABLED);
+	}
+
+	/*
+	 * We're taking recursive faults here in make_task_dead. Safest is to just
+	 * leave this task alone and wait for reboot.
+	 */
+	if (unlikely(tsk->flags & PF_EXITING)) {
+		pr_alert("Fixing recursive fault but reboot is needed!\n");
+		futex_exit_recursive(tsk);
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule();
+	}
+
 	do_exit(signr);
 }
 
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 25d8a88b32e5..39a1522865b5 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -1044,7 +1044,7 @@ static void futex_cleanup(struct task_struct *tsk)
  * actually finished the futex cleanup. The worst case for this is that the
  * waiter runs through the wait loop until the state becomes visible.
  *
- * This is called from the recursive fault handling path in do_exit().
+ * This is called from the recursive fault handling path in make_task_dead().
  *
  * This is best effort. Either the futex exit code has run already or
  * not. If the OWNER_DIED bit has been set on the futex then the waiter can
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 5a5d192a89ac..68480f731192 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -81,7 +81,7 @@ int kexec_should_crash(struct task_struct *p)
 	if (crash_kexec_post_notifiers)
 		return 0;
 	/*
-	 * There are 4 panic() calls in do_exit() path, each of which
+	 * There are 4 panic() calls in make_task_dead() path, each of which
 	 * corresponds to each of these 4 conditions.
 	 */
 	if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
-- 
cgit v1.3


From 7f80a2fd7db9a55894fd841915236aca611291b5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 22 Nov 2021 09:51:03 -0600
Subject: exit: Stop poorly open coding do_task_dead in make_task_dead

When the kernel detects it is oops or otherwise force killing a task
while it exits the code poorly attempts to permanently stop the task
from scheduling.

I say poorly because it is possible for a task in TASK_UINTERRUPTIBLE
to be woken up.

As it makes no sense for the task to continue call do_task_dead
instead which actually does the work and permanently removes the task
from the scheduler.  Guaranteeing the task will never be woken
up again.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/exit.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel/exit.c')

diff --git a/kernel/exit.c b/kernel/exit.c
index d0ec6f6b41cb..f975cd8a2ed8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -886,8 +886,7 @@ void __noreturn make_task_dead(int signr)
 	if (unlikely(tsk->flags & PF_EXITING)) {
 		pr_alert("Fixing recursive fault but reboot is needed!\n");
 		futex_exit_recursive(tsk);
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule();
+		do_task_dead();
 	}
 
 	do_exit(signr);
-- 
cgit v1.3


From eb55e716ac1aa0de13ef5abbf1479d995582d967 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 22 Nov 2021 10:01:59 -0600
Subject: exit: Stop exporting do_exit

Now that there are no more modular uses of do_exit remove the EXPORT_SYMBOL.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/exit.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel/exit.c')

diff --git a/kernel/exit.c b/kernel/exit.c
index f975cd8a2ed8..57afac845a0a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -843,7 +843,6 @@ void __noreturn do_exit(long code)
 	lockdep_free_task(tsk);
 	do_task_dead();
 }
-EXPORT_SYMBOL_GPL(do_exit);
 
 void __noreturn make_task_dead(int signr)
 {
-- 
cgit v1.3


From cead18552660702a4a46f58e65188fe5f36e9dfe Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 22 Nov 2021 11:15:19 -0600
Subject: exit: Rename complete_and_exit to kthread_complete_and_exit

Update complete_and_exit to call kthread_exit instead of do_exit.

Change the name to reflect this change in functionality.  All of the
users of complete_and_exit are causing the current kthread to exit so
this change makes it clear what is happening.

Move the implementation of kthread_complete_and_exit from
kernel/exit.c to to kernel/kthread.c.  As this function is kthread
specific it makes most sense to live with the kthread functions.

There are no functional change.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 drivers/net/wireless/rsi/rsi_91x_coex.c      |  2 +-
 drivers/net/wireless/rsi/rsi_91x_main.c      |  2 +-
 drivers/net/wireless/rsi/rsi_91x_sdio_ops.c  |  2 +-
 drivers/net/wireless/rsi/rsi_91x_usb_ops.c   |  2 +-
 drivers/pnp/pnpbios/core.c                   |  6 +++---
 drivers/staging/rts5208/rtsx.c               | 16 ++++++++--------
 drivers/usb/atm/usbatm.c                     |  2 +-
 drivers/usb/gadget/function/f_mass_storage.c |  2 +-
 fs/jffs2/background.c                        |  2 +-
 include/linux/kernel.h                       |  1 -
 include/linux/kthread.h                      |  1 +
 kernel/exit.c                                |  9 ---------
 kernel/kthread.c                             | 21 +++++++++++++++++++++
 lib/kunit/try-catch.c                        |  4 ++--
 tools/objtool/check.c                        |  2 +-
 15 files changed, 43 insertions(+), 31 deletions(-)

(limited to 'kernel/exit.c')

diff --git a/drivers/net/wireless/rsi/rsi_91x_coex.c b/drivers/net/wireless/rsi/rsi_91x_coex.c
index a0c5d02ae88c..8a3d86897ea8 100644
--- a/drivers/net/wireless/rsi/rsi_91x_coex.c
+++ b/drivers/net/wireless/rsi/rsi_91x_coex.c
@@ -63,7 +63,7 @@ static void rsi_coex_scheduler_thread(struct rsi_common *common)
 		rsi_coex_sched_tx_pkts(coex_cb);
 	} while (atomic_read(&coex_cb->coex_tx_thread.thread_done) == 0);
 
-	complete_and_exit(&coex_cb->coex_tx_thread.completion, 0);
+	kthread_complete_and_exit(&coex_cb->coex_tx_thread.completion, 0);
 }
 
 int rsi_coex_recv_pkt(struct rsi_common *common, u8 *msg)
diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c
index f1bf71e6c608..c7f5cec5e446 100644
--- a/drivers/net/wireless/rsi/rsi_91x_main.c
+++ b/drivers/net/wireless/rsi/rsi_91x_main.c
@@ -260,7 +260,7 @@ static void rsi_tx_scheduler_thread(struct rsi_common *common)
 		if (common->init_done)
 			rsi_core_qos_processor(common);
 	} while (atomic_read(&common->tx_thread.thread_done) == 0);
-	complete_and_exit(&common->tx_thread.completion, 0);
+	kthread_complete_and_exit(&common->tx_thread.completion, 0);
 }
 
 #ifdef CONFIG_RSI_COEX
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
index 8ace1874e5cb..b2b47a0abcbf 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
@@ -75,7 +75,7 @@ void rsi_sdio_rx_thread(struct rsi_common *common)
 
 	rsi_dbg(INFO_ZONE, "%s: Terminated SDIO RX thread\n", __func__);
 	atomic_inc(&sdev->rx_thread.thread_done);
-	complete_and_exit(&sdev->rx_thread.completion, 0);
+	kthread_complete_and_exit(&sdev->rx_thread.completion, 0);
 }
 
 /**
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
index 4ffcdde1acb1..5130b0e72adc 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
@@ -56,6 +56,6 @@ void rsi_usb_rx_thread(struct rsi_common *common)
 out:
 	rsi_dbg(INFO_ZONE, "%s: Terminated thread\n", __func__);
 	skb_queue_purge(&dev->rx_q);
-	complete_and_exit(&dev->rx_thread.completion, 0);
+	kthread_complete_and_exit(&dev->rx_thread.completion, 0);
 }
 
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index 669ef4700c1a..f7e86ae9f72f 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -160,7 +160,7 @@ static int pnp_dock_thread(void *unused)
 			 * No dock to manage
 			 */
 		case PNP_FUNCTION_NOT_SUPPORTED:
-			complete_and_exit(&unload_sem, 0);
+			kthread_complete_and_exit(&unload_sem, 0);
 		case PNP_SYSTEM_NOT_DOCKED:
 			d = 0;
 			break;
@@ -170,7 +170,7 @@ static int pnp_dock_thread(void *unused)
 		default:
 			pnpbios_print_status("pnp_dock_thread", status);
 			printk(KERN_WARNING "PnPBIOS: disabling dock monitoring.\n");
-			complete_and_exit(&unload_sem, 0);
+			kthread_complete_and_exit(&unload_sem, 0);
 		}
 		if (d != docked) {
 			if (pnp_dock_event(d, &now) == 0) {
@@ -183,7 +183,7 @@ static int pnp_dock_thread(void *unused)
 			}
 		}
 	}
-	complete_and_exit(&unload_sem, 0);
+	kthread_complete_and_exit(&unload_sem, 0);
 }
 
 static int pnpbios_get_resources(struct pnp_dev *dev)
diff --git a/drivers/staging/rts5208/rtsx.c b/drivers/staging/rts5208/rtsx.c
index 91fcf85e150a..5a58dac76c88 100644
--- a/drivers/staging/rts5208/rtsx.c
+++ b/drivers/staging/rts5208/rtsx.c
@@ -450,13 +450,13 @@ skip_for_abort:
 	 * after the down() -- that's necessary for the thread-shutdown
 	 * case.
 	 *
-	 * complete_and_exit() goes even further than this -- it is safe in
-	 * the case that the thread of the caller is going away (not just
-	 * the structure) -- this is necessary for the module-remove case.
-	 * This is important in preemption kernels, which transfer the flow
-	 * of execution immediately upon a complete().
+	 * kthread_complete_and_exit() goes even further than this --
+	 * it is safe in the case that the thread of the caller is going away
+	 * (not just the structure) -- this is necessary for the module-remove
+	 * case.  This is important in preemption kernels, which transfer the
+	 * flow of execution immediately upon a complete().
 	 */
-	complete_and_exit(&dev->control_exit, 0);
+	kthread_complete_and_exit(&dev->control_exit, 0);
 }
 
 static int rtsx_polling_thread(void *__dev)
@@ -501,7 +501,7 @@ static int rtsx_polling_thread(void *__dev)
 		mutex_unlock(&dev->dev_mutex);
 	}
 
-	complete_and_exit(&dev->polling_exit, 0);
+	kthread_complete_and_exit(&dev->polling_exit, 0);
 }
 
 /*
@@ -682,7 +682,7 @@ static int rtsx_scan_thread(void *__dev)
 		/* Should we unbind if no devices were detected? */
 	}
 
-	complete_and_exit(&dev->scanning_done, 0);
+	kthread_complete_and_exit(&dev->scanning_done, 0);
 }
 
 static void rtsx_init_options(struct rtsx_chip *chip)
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index da17be1ef64e..e3a49d837609 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -969,7 +969,7 @@ static int usbatm_do_heavy_init(void *arg)
 	instance->thread = NULL;
 	mutex_unlock(&instance->serialize);
 
-	complete_and_exit(&instance->thread_exited, ret);
+	kthread_complete_and_exit(&instance->thread_exited, ret);
 }
 
 static int usbatm_heavy_init(struct usbatm_data *instance)
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 752439690fda..46dd11dcb3a8 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -2547,7 +2547,7 @@ static int fsg_main_thread(void *common_)
 	up_write(&common->filesem);
 
 	/* Let fsg_unbind() know the thread has exited */
-	complete_and_exit(&common->thread_notifier, 0);
+	kthread_complete_and_exit(&common->thread_notifier, 0);
 }
 
 
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 2b4d5013dc5d..6da92ecaf66d 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -161,5 +161,5 @@ static int jffs2_garbage_collect_thread(void *_c)
 	spin_lock(&c->erase_completion_lock);
 	c->gc_task = NULL;
 	spin_unlock(&c->erase_completion_lock);
-	complete_and_exit(&c->gc_thread_exit, 0);
+	kthread_complete_and_exit(&c->gc_thread_exit, 0);
 }
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 77755ac3e189..055eb203c00e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -187,7 +187,6 @@ static inline void might_fault(void) { }
 #endif
 
 void do_exit(long error_code) __noreturn;
-void complete_and_exit(struct completion *, long) __noreturn;
 
 extern int num_to_str(char *buf, int size,
 		      unsigned long long num, unsigned int width);
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 22c43d419687..d86a7e3b9a52 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -71,6 +71,7 @@ int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
 void kthread_exit(long result) __noreturn;
+void kthread_complete_and_exit(struct completion *, long) __noreturn;
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
diff --git a/kernel/exit.c b/kernel/exit.c
index 57afac845a0a..6c4b04531f17 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -891,15 +891,6 @@ void __noreturn make_task_dead(int signr)
 	do_exit(signr);
 }
 
-void complete_and_exit(struct completion *comp, long code)
-{
-	if (comp)
-		complete(comp);
-
-	do_exit(code);
-}
-EXPORT_SYMBOL(complete_and_exit);
-
 SYSCALL_DEFINE1(exit, int, error_code)
 {
 	do_exit((error_code&0xff)<<8);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 77b7c3f23f18..4388d6694a7f 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -283,6 +283,27 @@ void __noreturn kthread_exit(long result)
 	do_exit(result);
 }
 
+/**
+ * kthread_complete_and exit - Exit the current kthread.
+ * @comp: Completion to complete
+ * @code: The integer value to return to kthread_stop().
+ *
+ * If present complete @comp and the reuturn code to kthread_stop().
+ *
+ * A kernel thread whose module may be removed after the completion of
+ * @comp can use this function exit safely.
+ *
+ * Does not return.
+ */
+void __noreturn kthread_complete_and_exit(struct completion *comp, long code)
+{
+	if (comp)
+		complete(comp);
+
+	kthread_exit(code);
+}
+EXPORT_SYMBOL(kthread_complete_and_exit);
+
 static int kthread(void *_create)
 {
 	static const struct sched_param param = { .sched_priority = 0 };
diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c
index 0dd434e40487..be38a2c5ecc2 100644
--- a/lib/kunit/try-catch.c
+++ b/lib/kunit/try-catch.c
@@ -17,7 +17,7 @@
 void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch)
 {
 	try_catch->try_result = -EFAULT;
-	complete_and_exit(try_catch->try_completion, -EFAULT);
+	kthread_complete_and_exit(try_catch->try_completion, -EFAULT);
 }
 EXPORT_SYMBOL_GPL(kunit_try_catch_throw);
 
@@ -27,7 +27,7 @@ static int kunit_generic_run_threadfn_adapter(void *data)
 
 	try_catch->try(try_catch->context);
 
-	complete_and_exit(try_catch->try_completion, 0);
+	kthread_complete_and_exit(try_catch->try_completion, 0);
 }
 
 static unsigned long kunit_test_timeout(void)
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 120e9598c11a..282273a1ffa5 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -171,7 +171,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
 		"kthread_exit",
 		"make_task_dead",
 		"__module_put_and_kthread_exit",
-		"complete_and_exit",
+		"kthread_complete_and_exit",
 		"__reiserfs_panic",
 		"lbug_with_loc",
 		"fortify_panic",
-- 
cgit v1.3


From 912616f142bfeb1dc41f40dbe7ce38331886a94a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 5 Jan 2022 16:30:21 -0600
Subject: exit: Guarantee make_task_dead leaks the tsk when calling
 do_task_exit

Change the task state to EXIT_DEAD and take an extra rcu_refernce
to guarantee the task will not be reaped and that it will not be
freed.

Link: https://lkml.kernel.org/r/YdUzjrLAlRiNLQp2@zeniv-ca.linux.org.uk
Pointed-out-by: Al Viro <viro@zeniv.linux.org.uk>
Fixes: 7f80a2fd7db9 ("exit: Stop poorly open coding do_task_dead in make_task_dead")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/exit.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/exit.c')

diff --git a/kernel/exit.c b/kernel/exit.c
index 6c4b04531f17..db4eeb7fc680 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -885,6 +885,8 @@ void __noreturn make_task_dead(int signr)
 	if (unlikely(tsk->flags & PF_EXITING)) {
 		pr_alert("Fixing recursive fault but reboot is needed!\n");
 		futex_exit_recursive(tsk);
+		tsk->exit_state = EXIT_DEAD;
+		refcount_inc(&tsk->rcu_users);
 		do_task_dead();
 	}
 
-- 
cgit v1.3


From de77c3a5b95c95a4915142071643d94e3e1ada35 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 7 Jan 2022 12:18:12 -0600
Subject: exit: Move force_uaccess back into do_exit

With kernel threads on architectures that still have set_fs/get_fs
running as KERNEL_DS moving force_uaccess_begin does not appear safe.
Calling force_uaccess_begin is a noop on anything people care about.

Update the comment to explain why this code while looking like an
obvious candidate for moving to make_task_dead probably needs to
remain in do_exit until set_fs/get_fs are entirely removed from the
kernel.

Fixes: 05ea0424f0e2 ("exit: Move oops specific logic from do_exit into make_task_dead")
Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lkml.kernel.org/r/YdUxGKRcSiDy8jGg@zeniv-ca.linux.org.uk
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/exit.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'kernel/exit.c')

diff --git a/kernel/exit.c b/kernel/exit.c
index db4eeb7fc680..fc0726cb22db 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -737,6 +737,20 @@ void __noreturn do_exit(long code)
 
 	WARN_ON(blk_needs_flush_plug(tsk));
 
+	/*
+	 * If do_dead is called because this processes oopsed, it's possible
+	 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
+	 * continuing. Amongst other possible reasons, this is to prevent
+	 * mm_release()->clear_child_tid() from writing to a user-controlled
+	 * kernel address.
+	 *
+	 * On uptodate architectures force_uaccess_begin is a noop.  On
+	 * architectures that still have set_fs/get_fs in addition to handling
+	 * oopses handles kernel threads that run as set_fs(KERNEL_DS) by
+	 * default.
+	 */
+	force_uaccess_begin();
+
 	profile_task_exit(tsk);
 	kcov_task_exit(tsk);
 
@@ -862,15 +876,6 @@ void __noreturn make_task_dead(int signr)
 	if (unlikely(!tsk->pid))
 		panic("Attempted to kill the idle task!");
 
-	/*
-	 * If make_task_dead is called because this processes oopsed, it's possible
-	 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
-	 * continuing. Amongst other possible reasons, this is to prevent
-	 * mm_release()->clear_child_tid() from writing to a user-controlled
-	 * kernel address.
-	 */
-	force_uaccess_begin();
-
 	if (unlikely(in_atomic())) {
 		pr_info("note: %s[%d] exited with preempt_count %d\n",
 			current->comm, task_pid_nr(current),
-- 
cgit v1.3


From 60700e38fb68e800607ca7a027060d5419fc5798 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 6 Jun 2021 13:47:53 -0500
Subject: signal: Rename group_exit_task group_exec_task

The only remaining user of group_exit_task is exec.  Rename the field
so that it is clear which part of the code uses it.

Update the comment above the definition of group_exec_task to document
how it is currently used.

Link: https://lkml.kernel.org/r/20211213225350.27481-7-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/exec.c                    |  8 ++++----
 include/linux/sched/signal.h | 12 ++++--------
 kernel/exit.c                |  4 ++--
 3 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'kernel/exit.c')

diff --git a/fs/exec.c b/fs/exec.c
index 59cac7c18178..9d2925811011 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1054,7 +1054,7 @@ static int de_thread(struct task_struct *tsk)
 		return -EAGAIN;
 	}
 
-	sig->group_exit_task = tsk;
+	sig->group_exec_task = tsk;
 	sig->notify_count = zap_other_threads(tsk);
 	if (!thread_group_leader(tsk))
 		sig->notify_count--;
@@ -1082,7 +1082,7 @@ static int de_thread(struct task_struct *tsk)
 			write_lock_irq(&tasklist_lock);
 			/*
 			 * Do this under tasklist_lock to ensure that
-			 * exit_notify() can't miss ->group_exit_task
+			 * exit_notify() can't miss ->group_exec_task
 			 */
 			sig->notify_count = -1;
 			if (likely(leader->exit_state))
@@ -1149,7 +1149,7 @@ static int de_thread(struct task_struct *tsk)
 		release_task(leader);
 	}
 
-	sig->group_exit_task = NULL;
+	sig->group_exec_task = NULL;
 	sig->notify_count = 0;
 
 no_thread_group:
@@ -1162,7 +1162,7 @@ no_thread_group:
 killed:
 	/* protects against exit_notify() and __exit_signal() */
 	read_lock(&tasklist_lock);
-	sig->group_exit_task = NULL;
+	sig->group_exec_task = NULL;
 	sig->notify_count = 0;
 	read_unlock(&tasklist_lock);
 	return -EAGAIN;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ecc10e148799..d3248aba5183 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -109,13 +109,9 @@ struct signal_struct {
 
 	/* thread group exit support */
 	int			group_exit_code;
-	/* overloaded:
-	 * - notify group_exit_task when ->count is equal to notify_count
-	 * - everyone except group_exit_task is stopped during signal delivery
-	 *   of fatal signals, group_exit_task processes the signal.
-	 */
+	/* notify group_exec_task when notify_count is less or equal to 0 */
 	int			notify_count;
-	struct task_struct	*group_exit_task;
+	struct task_struct	*group_exec_task;
 
 	/* thread group stop support, overloads group_exit_code too */
 	int			group_stop_count;
@@ -275,11 +271,11 @@ static inline void signal_set_stop_flags(struct signal_struct *sig,
 	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
 }
 
-/* If true, all threads except ->group_exit_task have pending SIGKILL */
+/* If true, all threads except ->group_exec_task have pending SIGKILL */
 static inline int signal_group_exit(const struct signal_struct *sig)
 {
 	return	(sig->flags & SIGNAL_GROUP_EXIT) ||
-		(sig->group_exit_task != NULL);
+		(sig->group_exec_task != NULL);
 }
 
 extern void flush_signals(struct task_struct *);
diff --git a/kernel/exit.c b/kernel/exit.c
index fc0726cb22db..b05578abbf26 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -116,7 +116,7 @@ static void __exit_signal(struct task_struct *tsk)
 		 * then notify it:
 		 */
 		if (sig->notify_count > 0 && !--sig->notify_count)
-			wake_up_process(sig->group_exit_task);
+			wake_up_process(sig->group_exec_task);
 
 		if (tsk == sig->curr_target)
 			sig->curr_target = next_thread(tsk);
@@ -697,7 +697,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 
 	/* mt-exec, de_thread() is waiting for group leader */
 	if (unlikely(tsk->signal->notify_count < 0))
-		wake_up_process(tsk->signal->group_exit_task);
+		wake_up_process(tsk->signal->group_exec_task);
 	write_unlock_irq(&tasklist_lock);
 
 	list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
-- 
cgit v1.3


From 49697335e0b441b0553598c1b48ee9ebb053d2f1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 24 Jun 2021 02:14:30 -0500
Subject: signal: Remove the helper signal_group_exit

This helper is misleading.  It tests for an ongoing exec as well as
the process having received a fatal signal.

Sometimes it is appropriate to treat an on-going exec differently than
a process that is shutting down due to a fatal signal.  In particular
taking the fast path out of exit_signals instead of retargeting
signals is not appropriate during exec, and not changing the the exit
code in do_group_exit during exec.

Removing the helper makes it more obvious what is going on as both
cases must be coded for explicitly.

While removing the helper fix the two cases where I have observed
using signal_group_exit resulted in the wrong result.

In exit_signals only test for SIGNAL_GROUP_EXIT so that signals are
retargetted during an exec.

In do_group_exit use 0 as the exit code during an exec as de_thread
does not set group_exit_code.  As best as I can determine
group_exit_code has been is set to 0 most of the time during
de_thread.  During a thread group stop group_exit_code is set to the
stop signal and when the thread group receives SIGCONT group_exit_code
is reset to 0.

Link: https://lkml.kernel.org/r/20211213225350.27481-8-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/coredump.c                | 5 +++--
 fs/exec.c                    | 2 +-
 include/linux/sched/signal.h | 7 -------
 kernel/exit.c                | 8 ++++++--
 kernel/signal.c              | 8 +++++---
 5 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'kernel/exit.c')

diff --git a/fs/coredump.c b/fs/coredump.c
index c92ffc0bf2c2..7dece20b162b 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -372,11 +372,12 @@ static int zap_process(struct task_struct *start, int exit_code)
 static int zap_threads(struct task_struct *tsk,
 			struct core_state *core_state, int exit_code)
 {
+	struct signal_struct *signal = tsk->signal;
 	int nr = -EAGAIN;
 
 	spin_lock_irq(&tsk->sighand->siglock);
-	if (!signal_group_exit(tsk->signal)) {
-		tsk->signal->core_state = core_state;
+	if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) {
+		signal->core_state = core_state;
 		nr = zap_process(tsk, exit_code);
 		clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
 		tsk->flags |= PF_DUMPCORE;
diff --git a/fs/exec.c b/fs/exec.c
index 9d2925811011..82db656ca709 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1045,7 +1045,7 @@ static int de_thread(struct task_struct *tsk)
 	 * Kill all other threads in the thread group.
 	 */
 	spin_lock_irq(lock);
-	if (signal_group_exit(sig)) {
+	if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) {
 		/*
 		 * Another group action in progress, just
 		 * return so that the signal is processed.
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index d3248aba5183..b6ecb9fc4cd2 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -271,13 +271,6 @@ static inline void signal_set_stop_flags(struct signal_struct *sig,
 	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
 }
 
-/* If true, all threads except ->group_exec_task have pending SIGKILL */
-static inline int signal_group_exit(const struct signal_struct *sig)
-{
-	return	(sig->flags & SIGNAL_GROUP_EXIT) ||
-		(sig->group_exec_task != NULL);
-}
-
 extern void flush_signals(struct task_struct *);
 extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
diff --git a/kernel/exit.c b/kernel/exit.c
index b05578abbf26..861cfb1e2f77 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -914,15 +914,19 @@ do_group_exit(int exit_code)
 
 	BUG_ON(exit_code & 0x80); /* core dumps don't get here */
 
-	if (signal_group_exit(sig))
+	if (sig->flags & SIGNAL_GROUP_EXIT)
 		exit_code = sig->group_exit_code;
+	else if (sig->group_exec_task)
+		exit_code = 0;
 	else if (!thread_group_empty(current)) {
 		struct sighand_struct *const sighand = current->sighand;
 
 		spin_lock_irq(&sighand->siglock);
-		if (signal_group_exit(sig))
+		if (sig->flags & SIGNAL_GROUP_EXIT)
 			/* Another thread got here before we took the lock.  */
 			exit_code = sig->group_exit_code;
+		else if (sig->group_exec_task)
+			exit_code = 0;
 		else {
 			sig->group_exit_code = exit_code;
 			sig->flags = SIGNAL_GROUP_EXIT;
diff --git a/kernel/signal.c b/kernel/signal.c
index bae231bc2f4a..167b8e196a79 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2386,7 +2386,8 @@ static bool do_signal_stop(int signr)
 		WARN_ON_ONCE(signr & ~JOBCTL_STOP_SIGMASK);
 
 		if (!likely(current->jobctl & JOBCTL_STOP_DEQUEUED) ||
-		    unlikely(signal_group_exit(sig)))
+		    unlikely(sig->flags & SIGNAL_GROUP_EXIT) ||
+		    unlikely(sig->group_exec_task))
 			return false;
 		/*
 		 * There is no group stop already in progress.  We must
@@ -2693,7 +2694,8 @@ relock:
 		enum pid_type type;
 
 		/* Has this task already been marked for death? */
-		if (signal_group_exit(signal)) {
+		if ((signal->flags & SIGNAL_GROUP_EXIT) ||
+		     signal->group_exec_task) {
 			ksig->info.si_signo = signr = SIGKILL;
 			sigdelset(&current->pending.signal, SIGKILL);
 			trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
@@ -2949,7 +2951,7 @@ void exit_signals(struct task_struct *tsk)
 	 */
 	cgroup_threadgroup_change_begin(tsk);
 
-	if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
+	if (thread_group_empty(tsk) || (tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
 		tsk->flags |= PF_EXITING;
 		cgroup_threadgroup_change_end(tsk);
 		return;
-- 
cgit v1.3


From 2d4bcf886e42f0f4846a3d9bdc3a90d278903a2e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 8 Jan 2022 11:23:02 -0600
Subject: exit: Remove profile_task_exit & profile_munmap

When I say remove I mean remove.  All profile_task_exit and
profile_munmap do is call a blocking notifier chain.  The helpers
profile_task_register and profile_task_unregister are not called
anywhere in the tree.  Which means this is all dead code.

So remove the dead code and make it easier to read do_exit.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lkml.kernel.org/r/20220103213312.9144-1-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/profile.h | 26 -------------------------
 kernel/exit.c           |  1 -
 kernel/profile.c        | 50 -------------------------------------------------
 mm/mmap.c               |  1 -
 4 files changed, 78 deletions(-)

(limited to 'kernel/exit.c')

diff --git a/include/linux/profile.h b/include/linux/profile.h
index fd18ca96f557..f7eb2b57d890 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -31,11 +31,6 @@ static inline int create_proc_profile(void)
 }
 #endif
 
-enum profile_type {
-	PROFILE_TASK_EXIT,
-	PROFILE_MUNMAP
-};
-
 #ifdef CONFIG_PROFILING
 
 extern int prof_on __read_mostly;
@@ -66,23 +61,14 @@ static inline void profile_hit(int type, void *ip)
 struct task_struct;
 struct mm_struct;
 
-/* task is in do_exit() */
-void profile_task_exit(struct task_struct * task);
-
 /* task is dead, free task struct ? Returns 1 if
  * the task was taken, 0 if the task should be freed.
  */
 int profile_handoff_task(struct task_struct * task);
 
-/* sys_munmap */
-void profile_munmap(unsigned long addr);
-
 int task_handoff_register(struct notifier_block * n);
 int task_handoff_unregister(struct notifier_block * n);
 
-int profile_event_register(enum profile_type, struct notifier_block * n);
-int profile_event_unregister(enum profile_type, struct notifier_block * n);
-
 #else
 
 #define prof_on 0
@@ -117,19 +103,7 @@ static inline int task_handoff_unregister(struct notifier_block * n)
 	return -ENOSYS;
 }
 
-static inline int profile_event_register(enum profile_type t, struct notifier_block * n)
-{
-	return -ENOSYS;
-}
-
-static inline int profile_event_unregister(enum profile_type t, struct notifier_block * n)
-{
-	return -ENOSYS;
-}
-
-#define profile_task_exit(a) do { } while (0)
 #define profile_handoff_task(a) (0)
-#define profile_munmap(a) do { } while (0)
 
 #endif /* CONFIG_PROFILING */
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 861cfb1e2f77..64e907bc87d5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -751,7 +751,6 @@ void __noreturn do_exit(long code)
 	 */
 	force_uaccess_begin();
 
-	profile_task_exit(tsk);
 	kcov_task_exit(tsk);
 
 	coredump_task_exit(tsk);
diff --git a/kernel/profile.c b/kernel/profile.c
index eb9c7f0f5ac5..9355cc934a96 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -135,14 +135,7 @@ int __ref profile_init(void)
 
 /* Profile event notifications */
 
-static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
 static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
-static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
-
-void profile_task_exit(struct task_struct *task)
-{
-	blocking_notifier_call_chain(&task_exit_notifier, 0, task);
-}
 
 int profile_handoff_task(struct task_struct *task)
 {
@@ -151,11 +144,6 @@ int profile_handoff_task(struct task_struct *task)
 	return (ret == NOTIFY_OK) ? 1 : 0;
 }
 
-void profile_munmap(unsigned long addr)
-{
-	blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr);
-}
-
 int task_handoff_register(struct notifier_block *n)
 {
 	return atomic_notifier_chain_register(&task_free_notifier, n);
@@ -168,44 +156,6 @@ int task_handoff_unregister(struct notifier_block *n)
 }
 EXPORT_SYMBOL_GPL(task_handoff_unregister);
 
-int profile_event_register(enum profile_type type, struct notifier_block *n)
-{
-	int err = -EINVAL;
-
-	switch (type) {
-	case PROFILE_TASK_EXIT:
-		err = blocking_notifier_chain_register(
-				&task_exit_notifier, n);
-		break;
-	case PROFILE_MUNMAP:
-		err = blocking_notifier_chain_register(
-				&munmap_notifier, n);
-		break;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(profile_event_register);
-
-int profile_event_unregister(enum profile_type type, struct notifier_block *n)
-{
-	int err = -EINVAL;
-
-	switch (type) {
-	case PROFILE_TASK_EXIT:
-		err = blocking_notifier_chain_unregister(
-				&task_exit_notifier, n);
-		break;
-	case PROFILE_MUNMAP:
-		err = blocking_notifier_chain_unregister(
-				&munmap_notifier, n);
-		break;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(profile_event_unregister);
-
 #if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
 /*
  * Each cpu has a pair of open-addressed hashtables for pending
diff --git a/mm/mmap.c b/mm/mmap.c
index bfb0ea164a90..70318c2a47c3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2928,7 +2928,6 @@ EXPORT_SYMBOL(vm_munmap);
 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
 {
 	addr = untagged_addr(addr);
-	profile_munmap(addr);
 	return __vm_munmap(addr, len, true);
 }
 
-- 
cgit v1.3


From 270b6541e603a7fae0cad7af3dc3bca6adb343f3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 23 Dec 2021 10:05:19 -0600
Subject: exit: Coredumps reach do_group_exit

The comment about coredumps not reaching do_group_exit and the
corresponding BUG_ON are bogus.

What happens and has happened for years is that get_signal calls
do_coredump (which sets SIGNAL_GROUP_EXIT and group_exit_code) and
then do_group_exit passing the signal number.  Then do_group_exit
ignores the exit_code it is passed and uses signal->group_exit_code
from the coredump.

The comment and BUG_ON were correct when they were added during the
2.5 development cycle, but became obsolete and incorrect when
get_signal was changed to fall through to do_group_exit after
do_coredump in 2.6.10-rc2.

So remove the stale comment and BUG_ON

Fixes: 63bd6144f191 ("[PATCH] Invalid BUG_ONs in signal.c")
History-Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git
Link: https://lkml.kernel.org/r/20220103213312.9144-2-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/exit.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'kernel/exit.c')

diff --git a/kernel/exit.c b/kernel/exit.c
index 64e907bc87d5..db86307077d4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -911,8 +911,6 @@ do_group_exit(int exit_code)
 {
 	struct signal_struct *sig = current->signal;
 
-	BUG_ON(exit_code & 0x80); /* core dumps don't get here */
-
 	if (sig->flags & SIGNAL_GROUP_EXIT)
 		exit_code = sig->group_exit_code;
 	else if (sig->group_exec_task)
-- 
cgit v1.3


From 907c311f37ba04ccebd00a9b9f3ba718e318a1de Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 21 Dec 2021 10:11:01 -0600
Subject: exit: Fix the exit_code for wait_task_zombie

The function wait_task_zombie is defined to always returns the process not
thread exit status.  Unfortunately when process group exit support
was added to wait_task_zombie the WNOWAIT case was overlooked.

Usually tsk->exit_code and tsk->signal->group_exit_code will be in sync
so fixing this is bug probably has no effect in practice.  But fix
it anyway so that people aren't scratching their heads about why
the two code paths are different.

History-Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git
Fixes: 2c66151cbc2c ("[PATCH] sys_exit() threading improvements, BK-curr")
Link: https://lkml.kernel.org/r/20220103213312.9144-3-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/exit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel/exit.c')

diff --git a/kernel/exit.c b/kernel/exit.c
index db86307077d4..b00a25bb4ab9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1018,7 +1018,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		return 0;
 
 	if (unlikely(wo->wo_flags & WNOWAIT)) {
-		status = p->exit_code;
+		status = (p->signal->flags & SIGNAL_GROUP_EXIT)
+			? p->signal->group_exit_code : p->exit_code;
 		get_task_struct(p);
 		read_unlock(&tasklist_lock);
 		sched_annotate_sleep();
-- 
cgit v1.3