12 files changed, 543 insertions, 821 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 24939b8553e6..cab4c7b27e54 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -259,7 +259,7 @@ config DEBUG_INFO_NONE
 config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT
 	bool "Rely on the toolchain's implicit default DWARF version"
 	select DEBUG_INFO
-	depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128)
+	depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128)
 	help
 	  The implicit default version of DWARF debug info produced by a
 	  toolchain changes over time.
@@ -2621,6 +2621,19 @@ config FIND_BIT_BENCHMARK
 
 	  If unsure, say N.
 
+config FIND_BIT_BENCHMARK_RUST
+	tristate "Test find_bit functions in Rust"
+	depends on RUST
+	help
+	  This builds the "find_bit_benchmark_rust" module. It is a micro
+          benchmark that measures the performance of Rust functions that
+          correspond to the find_*_bit() operations in C. It follows the
+          FIND_BIT_BENCHMARK closely but will in general not yield same
+          numbers due to extra bounds checks and overhead of foreign
+          function calls.
+
+	  If unsure, say N.
+
 config TEST_FIRMWARE
 	tristate "Test firmware loading via userspace interface"
 	depends on FW_LOADER
diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
index 609ddfc73de5..4ce4b0c0109c 100644
--- a/lib/Kconfig.kcsan
+++ b/lib/Kconfig.kcsan
@@ -185,12 +185,6 @@ config KCSAN_WEAK_MEMORY
 	bool "Enable weak memory modeling to detect missing memory barriers"
 	default y
 	depends on KCSAN_STRICT
-	# We can either let objtool nop __tsan_func_{entry,exit}() and builtin
-	# atomics instrumentation in .noinstr.text, or use a compiler that can
-	# implement __no_kcsan to really remove all instrumentation.
-	depends on !ARCH_WANTS_NO_INSTR || HAVE_NOINSTR_HACK || \
-		   CC_IS_GCC || CLANG_VERSION >= 140000
-	select OBJTOOL if HAVE_NOINSTR_HACK
 	help
 	  Enable support for modeling a subset of weak memory, which allows
 	  detecting a subset of data races due to missing memory barriers.
diff --git a/lib/Kconfig.kmsan b/lib/Kconfig.kmsan
index 0541d7b079cc..7251b6b59e69 100644
--- a/lib/Kconfig.kmsan
+++ b/lib/Kconfig.kmsan
@@ -3,10 +3,7 @@ config HAVE_ARCH_KMSAN
 	bool
 
 config HAVE_KMSAN_COMPILER
-	# Clang versions <14.0.0 also support -fsanitize=kernel-memory, but not
-	# all the features necessary to build the kernel with KMSAN.
-	depends on CC_IS_CLANG && CLANG_VERSION >= 140000
-	def_bool $(cc-option,-fsanitize=kernel-memory -mllvm -msan-disable-checks=1)
+	def_bool CC_IS_CLANG
 
 config KMSAN
 	bool "KMSAN: detector of uninitialized values use"
@@ -28,15 +25,9 @@ config KMSAN
 
 if KMSAN
 
-config HAVE_KMSAN_PARAM_RETVAL
-	# -fsanitize-memory-param-retval is supported only by Clang >= 14.
-	depends on HAVE_KMSAN_COMPILER
-	def_bool $(cc-option,-fsanitize=kernel-memory -fsanitize-memory-param-retval)
-
 config KMSAN_CHECK_PARAM_RETVAL
 	bool "Check for uninitialized values passed to and returned from functions"
 	default y
-	depends on HAVE_KMSAN_PARAM_RETVAL
 	help
 	  If the compiler supports -fsanitize-memory-param-retval, KMSAN will
 	  eagerly check every function parameter passed by value and every
diff --git a/lib/Makefile b/lib/Makefile
index 392ff808c9b9..1ab2c4be3b66 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -62,6 +62,7 @@ obj-y += hexdump.o
 obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
 obj-y += kstrtox.o
 obj-$(CONFIG_FIND_BIT_BENCHMARK) += find_bit_benchmark.o
+obj-$(CONFIG_FIND_BIT_BENCHMARK_RUST) += find_bit_benchmark_rust.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
 test_dhry-objs := dhry_1.o dhry_2.o dhry_run.o
 obj-$(CONFIG_TEST_DHRY) += test_dhry.o
@@ -109,11 +110,7 @@ test_fpu-y := test_fpu_glue.o test_fpu_impl.o
 CFLAGS_test_fpu_impl.o += $(CC_FLAGS_FPU)
 CFLAGS_REMOVE_test_fpu_impl.o += $(CC_FLAGS_NO_FPU)
 
-# Some KUnit files (hooks.o) need to be built-in even when KUnit is a module,
-# so we can't just use obj-$(CONFIG_KUNIT).
-ifdef CONFIG_KUNIT
 obj-y += kunit/
-endif
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/find_bit_benchmark_rust.rs b/lib/find_bit_benchmark_rust.rs
new file mode 100644
index 000000000000..6bdc51de2f30
--- /dev/null
+++ b/lib/find_bit_benchmark_rust.rs
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+//! Benchmark for find_bit-like methods in Bitmap Rust API.
+
+use kernel::alloc::flags::GFP_KERNEL;
+use kernel::bindings;
+use kernel::bitmap::BitmapVec;
+use kernel::error::{code, Result};
+use kernel::prelude::module;
+use kernel::time::{Instant, Monotonic};
+use kernel::ThisModule;
+use kernel::{pr_cont, pr_err};
+
+const BITMAP_LEN: usize = 4096 * 8 * 10;
+// Reciprocal of the fraction of bits that are set in sparse bitmap.
+const SPARSENESS: usize = 500;
+
+/// Test module that benchmarks performance of traversing bitmaps.
+struct Benchmark();
+
+fn test_next_bit(bitmap: &BitmapVec) {
+    let time = Instant::<Monotonic>::now();
+    let mut cnt = 0;
+    let mut i = 0;
+
+    while let Some(index) = bitmap.next_bit(i) {
+        cnt += 1;
+        i = index + 1;
+        // CONFIG_RUST_BITMAP_HARDENED enforces strict bounds.
+        if i == BITMAP_LEN {
+            break;
+        }
+    }
+
+    let delta = time.elapsed();
+    pr_cont!(
+        "\nnext_bit:           {:18} ns, {:6} iterations",
+        delta.as_nanos(),
+        cnt
+    );
+}
+
+fn test_next_zero_bit(bitmap: &BitmapVec) {
+    let time = Instant::<Monotonic>::now();
+    let mut cnt = 0;
+    let mut i = 0;
+
+    while let Some(index) = bitmap.next_zero_bit(i) {
+        cnt += 1;
+        i = index + 1;
+        // CONFIG_RUST_BITMAP_HARDENED enforces strict bounds.
+        if i == BITMAP_LEN {
+            break;
+        }
+    }
+
+    let delta = time.elapsed();
+    pr_cont!(
+        "\nnext_zero_bit:      {:18} ns, {:6} iterations",
+        delta.as_nanos(),
+        cnt
+    );
+}
+
+fn find_bit_test() {
+    pr_err!("Benchmark");
+    pr_cont!("\nStart testing find_bit() Rust with random-filled bitmap");
+
+    let mut bitmap = BitmapVec::new(BITMAP_LEN, GFP_KERNEL).expect("alloc bitmap failed");
+    bitmap.fill_random();
+
+    test_next_bit(&bitmap);
+    test_next_zero_bit(&bitmap);
+
+    pr_cont!("\nStart testing find_bit() Rust with sparse bitmap");
+
+    let mut bitmap = BitmapVec::new(BITMAP_LEN, GFP_KERNEL).expect("alloc sparse bitmap failed");
+    let nbits = BITMAP_LEN / SPARSENESS;
+    for _i in 0..nbits {
+        // SAFETY: __get_random_u32_below is safe to call with any u32 argument.
+        let bit =
+            unsafe { bindings::__get_random_u32_below(BITMAP_LEN.try_into().unwrap()) as usize };
+        bitmap.set_bit(bit);
+    }
+
+    test_next_bit(&bitmap);
+    test_next_zero_bit(&bitmap);
+    pr_cont!("\n");
+}
+
+impl kernel::Module for Benchmark {
+    fn init(_module: &'static ThisModule) -> Result<Self> {
+        find_bit_test();
+        // Return error so test module can be inserted again without rmmod.
+        Err(code::EINVAL)
+    }
+}
+
+module! {
+    type: Benchmark,
+    name: "find_bit_benchmark_rust",
+    authors: ["Burak Emir <bqe@google.com>"],
+    description: "Module with benchmark for bitmap Rust API",
+    license: "GPL v2",
+}
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f9193f952f49..2fe66a6b8789 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -784,101 +784,6 @@ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
 }
 EXPORT_SYMBOL(iov_iter_discard);
 
-static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
-				   unsigned len_mask)
-{
-	const struct iovec *iov = iter_iov(i);
-	size_t size = i->count;
-	size_t skip = i->iov_offset;
-
-	do {
-		size_t len = iov->iov_len - skip;
-
-		if (len > size)
-			len = size;
-		if (len & len_mask)
-			return false;
-		if ((unsigned long)(iov->iov_base + skip) & addr_mask)
-			return false;
-
-		iov++;
-		size -= len;
-		skip = 0;
-	} while (size);
-
-	return true;
-}
-
-static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
-				  unsigned len_mask)
-{
-	const struct bio_vec *bvec = i->bvec;
-	unsigned skip = i->iov_offset;
-	size_t size = i->count;
-
-	do {
-		size_t len = bvec->bv_len - skip;
-
-		if (len > size)
-			len = size;
-		if (len & len_mask)
-			return false;
-		if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
-			return false;
-
-		bvec++;
-		size -= len;
-		skip = 0;
-	} while (size);
-
-	return true;
-}
-
-/**
- * iov_iter_is_aligned() - Check if the addresses and lengths of each segments
- * 	are aligned to the parameters.
- *
- * @i: &struct iov_iter to restore
- * @addr_mask: bit mask to check against the iov element's addresses
- * @len_mask: bit mask to check against the iov element's lengths
- *
- * Return: false if any addresses or lengths intersect with the provided masks
- */
-bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
-			 unsigned len_mask)
-{
-	if (likely(iter_is_ubuf(i))) {
-		if (i->count & len_mask)
-			return false;
-		if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask)
-			return false;
-		return true;
-	}
-
-	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
-		return iov_iter_aligned_iovec(i, addr_mask, len_mask);
-
-	if (iov_iter_is_bvec(i))
-		return iov_iter_aligned_bvec(i, addr_mask, len_mask);
-
-	/* With both xarray and folioq types, we're dealing with whole folios. */
-	if (iov_iter_is_xarray(i)) {
-		if (i->count & len_mask)
-			return false;
-		if ((i->xarray_start + i->iov_offset) & addr_mask)
-			return false;
-	}
-	if (iov_iter_is_folioq(i)) {
-		if (i->count & len_mask)
-			return false;
-		if (i->iov_offset & addr_mask)
-			return false;
-	}
-
-	return true;
-}
-EXPORT_SYMBOL_GPL(iov_iter_is_aligned);
-
 static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
 {
 	const struct iovec *iov = iter_iov(i);
diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig
index c10ede4b1d22..7a6af361d2fc 100644
--- a/lib/kunit/Kconfig
+++ b/lib/kunit/Kconfig
@@ -106,4 +106,15 @@ config KUNIT_DEFAULT_TIMEOUT
 	  If unsure, the default timeout of 300 seconds is suitable for most
 	  cases.
 
+config KUNIT_UML_PCI
+	bool "KUnit UML PCI Support"
+	depends on UML
+	select UML_PCI
+	help
+	  Enables the PCI subsystem on UML for use by KUnit tests.
+	  Some KUnit tests require the PCI core which is not enabled by
+	  default on UML.
+
+	  If unsure, say N.
+
 endif # KUNIT
diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile
index 5aa51978e456..656f1fa35abc 100644
--- a/lib/kunit/Makefile
+++ b/lib/kunit/Makefile
@@ -17,7 +17,7 @@ kunit-objs +=				debugfs.o
 endif
 
 # KUnit 'hooks' are built-in even when KUnit is built as a module.
-obj-y +=				hooks.o
+obj-$(if $(CONFIG_KUNIT),y) +=		hooks.o
 
 obj-$(CONFIG_KUNIT_TEST) +=		kunit-test.o
 obj-$(CONFIG_KUNIT_TEST) +=		platform-test.o
diff --git a/lib/kunit/kunit-example-test.c b/lib/kunit/kunit-example-test.c
index 3056d6bc705d..9452b163956f 100644
--- a/lib/kunit/kunit-example-test.c
+++ b/lib/kunit/kunit-example-test.c
@@ -278,6 +278,218 @@ static void example_slow_test(struct kunit *test)
 }
 
 /*
+ * This custom function allocates memory and sets the information we want
+ * stored in the kunit_resource->data field.
+ */
+static int example_resource_init(struct kunit_resource *res, void *context)
+{
+	int *info = kmalloc(sizeof(*info), GFP_KERNEL);
+
+	if (!info)
+		return -ENOMEM;
+	*info = *(int *)context;
+	res->data = info;
+	return 0;
+}
+
+/*
+ * This function deallocates memory for the kunit_resource->data field.
+ */
+static void example_resource_free(struct kunit_resource *res)
+{
+	kfree(res->data);
+}
+
+/*
+ * This match function is invoked by kunit_find_resource() to locate
+ * a test resource based on certain criteria.
+ */
+static bool example_resource_alloc_match(struct kunit *test,
+					 struct kunit_resource *res,
+					 void *match_data)
+{
+	return res->data && res->free == example_resource_free;
+}
+
+/*
+ * This is an example of a function that provides a description for each of the
+ * parameters in a parameterized test.
+ */
+static void example_param_array_get_desc(struct kunit *test, const void *p, char *desc)
+{
+	const struct example_param *param = p;
+
+	snprintf(desc, KUNIT_PARAM_DESC_SIZE,
+		 "example check if %d is less than or equal to 3", param->value);
+}
+
+/*
+ * This function gets passed in the parameterized test context i.e. the
+ * struct kunit belonging to the parameterized test. You can use this function
+ * to add resources you want shared across the whole parameterized test or
+ * for additional setup.
+ */
+static int example_param_init(struct kunit *test)
+{
+	int ctx = 3; /* Data to be stored. */
+	size_t arr_size = ARRAY_SIZE(example_params_array);
+
+	/*
+	 * This allocates a struct kunit_resource, sets its data field to
+	 * ctx, and adds it to the struct kunit's resources list. Note that
+	 * this is parameterized test managed. So, it doesn't need to have
+	 * a custom exit function to deallocation as it will get cleaned up at
+	 * the end of the parameterized test.
+	 */
+	void *data = kunit_alloc_resource(test, example_resource_init, example_resource_free,
+					  GFP_KERNEL, &ctx);
+
+	if (!data)
+		return -ENOMEM;
+	/*
+	 * Pass the parameter array information to the parameterized test context
+	 * struct kunit. Note that you will need to provide kunit_array_gen_params()
+	 * as the generator function to KUNIT_CASE_PARAM_WITH_INIT() when registering
+	 * a parameter array this route.
+	 */
+	kunit_register_params_array(test, example_params_array, arr_size,
+				    example_param_array_get_desc);
+	return 0;
+}
+
+/*
+ * This is an example of a test that uses shared resources available in the
+ * parameterized test context.
+ */
+static void example_params_test_with_init(struct kunit *test)
+{
+	int threshold;
+	struct kunit_resource *res;
+	const struct example_param *param = test->param_value;
+
+	/* By design, param pointer will not be NULL. */
+	KUNIT_ASSERT_NOT_NULL(test, param);
+
+	/*
+	 * Here we pass test->parent to search for shared resources in the
+	 * parameterized test context.
+	 */
+	res = kunit_find_resource(test->parent, example_resource_alloc_match, NULL);
+
+	KUNIT_ASSERT_NOT_NULL(test, res);
+
+	/* Since kunit_resource->data is a void pointer we need to typecast it. */
+	threshold = *((int *)res->data);
+
+	/* Assert that the parameter is less than or equal to a certain threshold. */
+	KUNIT_ASSERT_LE(test, param->value, threshold);
+
+	/* This decreases the reference count after calling kunit_find_resource(). */
+	kunit_put_resource(res);
+}
+
+/*
+ * Helper function to create a parameter array of Fibonacci numbers. This example
+ * highlights a parameter generation scenario that is:
+ * 1. Not feasible to fully pre-generate at compile time.
+ * 2. Challenging to implement with a standard generate_params() function,
+ * as it only provides the previous parameter, while Fibonacci requires
+ * access to two preceding values for calculation.
+ */
+static void *make_fibonacci_params(struct kunit *test, size_t seq_size)
+{
+	int *seq;
+
+	if (seq_size <= 0)
+		return NULL;
+	/*
+	 * Using kunit_kmalloc_array here ties the lifetime of the array to
+	 * the parameterized test i.e. it will get automatically cleaned up
+	 * by KUnit after the parameterized test finishes.
+	 */
+	seq = kunit_kmalloc_array(test, seq_size, sizeof(int), GFP_KERNEL);
+
+	if (!seq)
+		return NULL;
+	if (seq_size >= 1)
+		seq[0] = 0;
+	if (seq_size >= 2)
+		seq[1] = 1;
+	for (int i = 2; i < seq_size; i++)
+		seq[i] = seq[i - 1] + seq[i - 2];
+	return seq;
+}
+
+/*
+ * This is an example of a function that provides a description for each of the
+ * parameters.
+ */
+static void example_param_dynamic_arr_get_desc(struct kunit *test, const void *p, char *desc)
+{
+	const int *fib_num = p;
+
+	snprintf(desc, KUNIT_PARAM_DESC_SIZE, "fibonacci param: %d", *fib_num);
+}
+
+/*
+ * Example of a parameterized test param_init() function that registers a dynamic
+ * array of parameters.
+ */
+static int example_param_init_dynamic_arr(struct kunit *test)
+{
+	size_t seq_size;
+	int *fibonacci_params;
+
+	kunit_info(test, "initializing parameterized test\n");
+
+	seq_size = 6;
+	fibonacci_params = make_fibonacci_params(test, seq_size);
+
+	if (!fibonacci_params)
+		return -ENOMEM;
+
+	/*
+	 * Passes the dynamic parameter array information to the parameterized test
+	 * context struct kunit. The array and its metadata will be stored in
+	 * test->parent->params_array. The array itself will be located in
+	 * params_data.params.
+	 *
+	 * Note that you will need to pass kunit_array_gen_params() as the
+	 * generator function to KUNIT_CASE_PARAM_WITH_INIT() when registering
+	 * a parameter array this route.
+	 */
+	kunit_register_params_array(test, fibonacci_params, seq_size,
+				    example_param_dynamic_arr_get_desc);
+	return 0;
+}
+
+/*
+ * Example of a parameterized test param_exit() function that outputs a log
+ * at the end of the parameterized test. It could also be used for any other
+ * teardown logic.
+ */
+static void example_param_exit_dynamic_arr(struct kunit *test)
+{
+	kunit_info(test, "exiting parameterized test\n");
+}
+
+/*
+ * Example of test that uses the registered dynamic array to perform assertions
+ * and expectations.
+ */
+static void example_params_test_with_init_dynamic_arr(struct kunit *test)
+{
+	const int *param = test->param_value;
+	int param_val;
+
+	/* By design, param pointer will not be NULL. */
+	KUNIT_ASSERT_NOT_NULL(test, param);
+
+	param_val = *param;
+	KUNIT_EXPECT_EQ(test, param_val - param_val, 0);
+}
+
+/*
  * Here we make a list of all the test cases we want to add to the test suite
  * below.
  */
@@ -296,6 +508,11 @@ static struct kunit_case example_test_cases[] = {
 	KUNIT_CASE(example_static_stub_using_fn_ptr_test),
 	KUNIT_CASE(example_priv_test),
 	KUNIT_CASE_PARAM(example_params_test, example_gen_params),
+	KUNIT_CASE_PARAM_WITH_INIT(example_params_test_with_init, kunit_array_gen_params,
+				   example_param_init, NULL),
+	KUNIT_CASE_PARAM_WITH_INIT(example_params_test_with_init_dynamic_arr,
+				   kunit_array_gen_params, example_param_init_dynamic_arr,
+				   example_param_exit_dynamic_arr),
 	KUNIT_CASE_SLOW(example_slow_test),
 	{}
 };
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index d2bfa331a2b1..bb66ea1a3eac 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -337,6 +337,14 @@ void __kunit_do_failed_assertion(struct kunit *test,
 }
 EXPORT_SYMBOL_GPL(__kunit_do_failed_assertion);
 
+static void kunit_init_params(struct kunit *test)
+{
+	test->params_array.params = NULL;
+	test->params_array.get_description = NULL;
+	test->params_array.num_params = 0;
+	test->params_array.elem_size = 0;
+}
+
 void kunit_init_test(struct kunit *test, const char *name, struct string_stream *log)
 {
 	spin_lock_init(&test->lock);
@@ -347,6 +355,7 @@ void kunit_init_test(struct kunit *test, const char *name, struct string_stream
 		string_stream_clear(log);
 	test->status = KUNIT_SUCCESS;
 	test->status_comment[0] = '\0';
+	kunit_init_params(test);
 }
 EXPORT_SYMBOL_GPL(kunit_init_test);
 
@@ -641,12 +650,44 @@ static void kunit_accumulate_stats(struct kunit_result_stats *total,
 	total->total += add.total;
 }
 
+const void *kunit_array_gen_params(struct kunit *test, const void *prev, char *desc)
+{
+	struct kunit_params *params_arr = &test->params_array;
+	const void *param;
+
+	if (test->param_index < params_arr->num_params) {
+		param = (char *)params_arr->params
+			+ test->param_index * params_arr->elem_size;
+
+		if (params_arr->get_description)
+			params_arr->get_description(test, param, desc);
+		return param;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(kunit_array_gen_params);
+
+static void kunit_init_parent_param_test(struct kunit_case *test_case, struct kunit *test)
+{
+	if (test_case->param_init) {
+		int err = test_case->param_init(test);
+
+		if (err) {
+			kunit_err(test_case, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT
+				"# failed to initialize parent parameter test (%d)", err);
+			test->status = KUNIT_FAILURE;
+			test_case->status = KUNIT_FAILURE;
+		}
+	}
+}
+
 int kunit_run_tests(struct kunit_suite *suite)
 {
 	char param_desc[KUNIT_PARAM_DESC_SIZE];
 	struct kunit_case *test_case;
 	struct kunit_result_stats suite_stats = { 0 };
 	struct kunit_result_stats total_stats = { 0 };
+	const void *curr_param;
 
 	/* Taint the kernel so we know we've run tests. */
 	add_taint(TAINT_TEST, LOCKDEP_STILL_OK);
@@ -677,41 +718,64 @@ int kunit_run_tests(struct kunit_suite *suite)
 			kunit_run_case_catch_errors(suite, test_case, &test);
 			kunit_update_stats(&param_stats, test.status);
 		} else {
+			kunit_init_parent_param_test(test_case, &test);
+			if (test_case->status == KUNIT_FAILURE) {
+				kunit_update_stats(&param_stats, test.status);
+				goto test_case_end;
+			}
 			/* Get initial param. */
 			param_desc[0] = '\0';
-			test.param_value = test_case->generate_params(NULL, param_desc);
+			/* TODO: Make generate_params try-catch */
+			curr_param = test_case->generate_params(&test, NULL, param_desc);
 			test_case->status = KUNIT_SKIPPED;
 			kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT
 				  "KTAP version 1\n");
 			kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT
 				  "# Subtest: %s", test_case->name);
+			if (test.params_array.params &&
+			    test_case->generate_params == kunit_array_gen_params) {
+				kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT
+					  KUNIT_SUBTEST_INDENT "1..%zd\n",
+					  test.params_array.num_params);
+			}
 
-			while (test.param_value) {
-				kunit_run_case_catch_errors(suite, test_case, &test);
+			while (curr_param) {
+				struct kunit param_test = {
+					.param_value = curr_param,
+					.param_index = ++test.param_index,
+					.parent = &test,
+				};
+				kunit_init_test(&param_test, test_case->name, test_case->log);
+				kunit_run_case_catch_errors(suite, test_case, &param_test);
 
 				if (param_desc[0] == '\0') {
 					snprintf(param_desc, sizeof(param_desc),
-						 "param-%d", test.param_index);
+						 "param-%d", param_test.param_index);
 				}
 
-				kunit_print_ok_not_ok(&test, KUNIT_LEVEL_CASE_PARAM,
-						      test.status,
-						      test.param_index + 1,
+				kunit_print_ok_not_ok(&param_test, KUNIT_LEVEL_CASE_PARAM,
+						      param_test.status,
+						      param_test.param_index,
 						      param_desc,
-						      test.status_comment);
+						      param_test.status_comment);
 
-				kunit_update_stats(&param_stats, test.status);
+				kunit_update_stats(&param_stats, param_test.status);
 
 				/* Get next param. */
 				param_desc[0] = '\0';
-				test.param_value = test_case->generate_params(test.param_value, param_desc);
-				test.param_index++;
-				test.status = KUNIT_SUCCESS;
-				test.status_comment[0] = '\0';
-				test.priv = NULL;
+				curr_param = test_case->generate_params(&test, curr_param,
+									param_desc);
 			}
+			/*
+			 * TODO: Put into a try catch. Since we don't need suite->exit
+			 * for it we can't reuse kunit_try_run_cleanup for this yet.
+			 */
+			if (test_case->param_exit)
+				test_case->param_exit(&test);
+			/* TODO: Put this kunit_cleanup into a try-catch. */
+			kunit_cleanup(&test);
 		}
-
+test_case_end:
 		kunit_print_attr((void *)test_case, true, KUNIT_LEVEL_CASE);
 
 		kunit_print_test_stats(&test, param_stats);
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index b4ee2d29d7a9..ab4c6c21a625 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -83,13 +83,9 @@
 
 /*
  * Maple state flags
- * * MA_STATE_BULK		- Bulk insert mode
- * * MA_STATE_REBALANCE		- Indicate a rebalance during bulk insert
  * * MA_STATE_PREALLOC		- Preallocated nodes, WARN_ON allocation
  */
-#define MA_STATE_BULK		1
-#define MA_STATE_REBALANCE	2
-#define MA_STATE_PREALLOC	4
+#define MA_STATE_PREALLOC	1
 
 #define ma_parent_ptr(x) ((struct maple_pnode *)(x))
 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT)
@@ -176,26 +172,25 @@ static inline struct maple_node *mt_alloc_one(gfp_t gfp)
 	return kmem_cache_alloc(maple_node_cache, gfp);
 }
 
-static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
+static inline void mt_free_bulk(size_t size, void __rcu **nodes)
 {
-	return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
+	kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
 }
 
-static inline void mt_free_one(struct maple_node *node)
+static void mt_return_sheaf(struct slab_sheaf *sheaf)
 {
-	kmem_cache_free(maple_node_cache, node);
+	kmem_cache_return_sheaf(maple_node_cache, GFP_NOWAIT, sheaf);
 }
 
-static inline void mt_free_bulk(size_t size, void __rcu **nodes)
+static struct slab_sheaf *mt_get_sheaf(gfp_t gfp, int count)
 {
-	kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
+	return kmem_cache_prefill_sheaf(maple_node_cache, gfp, count);
 }
 
-static void mt_free_rcu(struct rcu_head *head)
+static int mt_refill_sheaf(gfp_t gfp, struct slab_sheaf **sheaf,
+		unsigned int size)
 {
-	struct maple_node *node = container_of(head, struct maple_node, rcu);
-
-	kmem_cache_free(maple_node_cache, node);
+	return kmem_cache_refill_sheaf(maple_node_cache, gfp, sheaf, size);
 }
 
 /*
@@ -208,7 +203,7 @@ static void mt_free_rcu(struct rcu_head *head)
 static void ma_free_rcu(struct maple_node *node)
 {
 	WARN_ON(node->parent != ma_parent_ptr(node));
-	call_rcu(&node->rcu, mt_free_rcu);
+	kfree_rcu(node, rcu);
 }
 
 static void mt_set_height(struct maple_tree *mt, unsigned char height)
@@ -591,67 +586,6 @@ static __always_inline bool mte_dead_node(const struct maple_enode *enode)
 }
 
 /*
- * mas_allocated() - Get the number of nodes allocated in a maple state.
- * @mas: The maple state
- *
- * The ma_state alloc member is overloaded to hold a pointer to the first
- * allocated node or to the number of requested nodes to allocate.  If bit 0 is
- * set, then the alloc contains the number of requested nodes.  If there is an
- * allocated node, then the total allocated nodes is in that node.
- *
- * Return: The total number of nodes allocated
- */
-static inline unsigned long mas_allocated(const struct ma_state *mas)
-{
-	if (!mas->alloc || ((unsigned long)mas->alloc & 0x1))
-		return 0;
-
-	return mas->alloc->total;
-}
-
-/*
- * mas_set_alloc_req() - Set the requested number of allocations.
- * @mas: the maple state
- * @count: the number of allocations.
- *
- * The requested number of allocations is either in the first allocated node,
- * located in @mas->alloc->request_count, or directly in @mas->alloc if there is
- * no allocated node.  Set the request either in the node or do the necessary
- * encoding to store in @mas->alloc directly.
- */
-static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count)
-{
-	if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) {
-		if (!count)
-			mas->alloc = NULL;
-		else
-			mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U);
-		return;
-	}
-
-	mas->alloc->request_count = count;
-}
-
-/*
- * mas_alloc_req() - get the requested number of allocations.
- * @mas: The maple state
- *
- * The alloc count is either stored directly in @mas, or in
- * @mas->alloc->request_count if there is at least one node allocated.  Decode
- * the request count if it's stored directly in @mas->alloc.
- *
- * Return: The allocation request count.
- */
-static inline unsigned int mas_alloc_req(const struct ma_state *mas)
-{
-	if ((unsigned long)mas->alloc & 0x1)
-		return (unsigned long)(mas->alloc) >> 1;
-	else if (mas->alloc)
-		return mas->alloc->request_count;
-	return 0;
-}
-
-/*
  * ma_pivots() - Get a pointer to the maple node pivots.
  * @node: the maple node
  * @type: the node type
@@ -1032,24 +966,6 @@ static inline void mas_descend(struct ma_state *mas)
 }
 
 /*
- * mte_set_gap() - Set a maple node gap.
- * @mn: The encoded maple node
- * @gap: The offset of the gap to set
- * @val: The gap value
- */
-static inline void mte_set_gap(const struct maple_enode *mn,
-				 unsigned char gap, unsigned long val)
-{
-	switch (mte_node_type(mn)) {
-	default:
-		break;
-	case maple_arange_64:
-		mte_to_node(mn)->ma64.gap[gap] = val;
-		break;
-	}
-}
-
-/*
  * mas_ascend() - Walk up a level of the tree.
  * @mas: The maple state
  *
@@ -1152,79 +1068,24 @@ static int mas_ascend(struct ma_state *mas)
  *
  * Return: A pointer to a maple node.
  */
-static inline struct maple_node *mas_pop_node(struct ma_state *mas)
+static __always_inline struct maple_node *mas_pop_node(struct ma_state *mas)
 {
-	struct maple_alloc *ret, *node = mas->alloc;
-	unsigned long total = mas_allocated(mas);
-	unsigned int req = mas_alloc_req(mas);
-
-	/* nothing or a request pending. */
-	if (WARN_ON(!total))
-		return NULL;
+	struct maple_node *ret;
 
-	if (total == 1) {
-		/* single allocation in this ma_state */
+	if (mas->alloc) {
+		ret = mas->alloc;
 		mas->alloc = NULL;
-		ret = node;
-		goto single_node;
+		goto out;
 	}
 
-	if (node->node_count == 1) {
-		/* Single allocation in this node. */
-		mas->alloc = node->slot[0];
-		mas->alloc->total = node->total - 1;
-		ret = node;
-		goto new_head;
-	}
-	node->total--;
-	ret = node->slot[--node->node_count];
-	node->slot[node->node_count] = NULL;
+	if (WARN_ON_ONCE(!mas->sheaf))
+		return NULL;
 
-single_node:
-new_head:
-	if (req) {
-		req++;
-		mas_set_alloc_req(mas, req);
-	}
+	ret = kmem_cache_alloc_from_sheaf(maple_node_cache, GFP_NOWAIT, mas->sheaf);
 
+out:
 	memset(ret, 0, sizeof(*ret));
-	return (struct maple_node *)ret;
-}
-
-/*
- * mas_push_node() - Push a node back on the maple state allocation.
- * @mas: The maple state
- * @used: The used maple node
- *
- * Stores the maple node back into @mas->alloc for reuse.  Updates allocated and
- * requested node count as necessary.
- */
-static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
-{
-	struct maple_alloc *reuse = (struct maple_alloc *)used;
-	struct maple_alloc *head = mas->alloc;
-	unsigned long count;
-	unsigned int requested = mas_alloc_req(mas);
-
-	count = mas_allocated(mas);
-
-	reuse->request_count = 0;
-	reuse->node_count = 0;
-	if (count) {
-		if (head->node_count < MAPLE_ALLOC_SLOTS) {
-			head->slot[head->node_count++] = reuse;
-			head->total++;
-			goto done;
-		}
-		reuse->slot[0] = head;
-		reuse->node_count = 1;
-	}
-
-	reuse->total = count + 1;
-	mas->alloc = reuse;
-done:
-	if (requested > 1)
-		mas_set_alloc_req(mas, requested - 1);
+	return ret;
 }
 
 /*
@@ -1234,121 +1095,81 @@ done:
  */
 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
 {
-	struct maple_alloc *node;
-	unsigned long allocated = mas_allocated(mas);
-	unsigned int requested = mas_alloc_req(mas);
-	unsigned int count;
-	void **slots = NULL;
-	unsigned int max_req = 0;
-
-	if (!requested)
+	if (!mas->node_request)
 		return;
 
-	mas_set_alloc_req(mas, 0);
-	if (mas->mas_flags & MA_STATE_PREALLOC) {
-		if (allocated)
+	if (mas->node_request == 1) {
+		if (mas->sheaf)
+			goto use_sheaf;
+
+		if (mas->alloc)
 			return;
-		WARN_ON(!allocated);
-	}
 
-	if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
-		node = (struct maple_alloc *)mt_alloc_one(gfp);
-		if (!node)
-			goto nomem_one;
+		mas->alloc = mt_alloc_one(gfp);
+		if (!mas->alloc)
+			goto error;
 
-		if (allocated) {
-			node->slot[0] = mas->alloc;
-			node->node_count = 1;
-		} else {
-			node->node_count = 0;
-		}
+		mas->node_request = 0;
+		return;
+	}
 
-		mas->alloc = node;
-		node->total = ++allocated;
-		node->request_count = 0;
-		requested--;
+use_sheaf:
+	if (unlikely(mas->alloc)) {
+		kfree(mas->alloc);
+		mas->alloc = NULL;
 	}
 
-	node = mas->alloc;
-	while (requested) {
-		max_req = MAPLE_ALLOC_SLOTS - node->node_count;
-		slots = (void **)&node->slot[node->node_count];
-		max_req = min(requested, max_req);
-		count = mt_alloc_bulk(gfp, max_req, slots);
-		if (!count)
-			goto nomem_bulk;
+	if (mas->sheaf) {
+		unsigned long refill;
 
-		if (node->node_count == 0) {
-			node->slot[0]->node_count = 0;
-			node->slot[0]->request_count = 0;
+		refill = mas->node_request;
+		if (kmem_cache_sheaf_size(mas->sheaf) >= refill) {
+			mas->node_request = 0;
+			return;
 		}
 
-		node->node_count += count;
-		allocated += count;
-		/* find a non-full node*/
-		do {
-			node = node->slot[0];
-		} while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS));
-		requested -= count;
-	}
-	mas->alloc->total = allocated;
-	return;
+		if (mt_refill_sheaf(gfp, &mas->sheaf, refill))
+			goto error;
 
-nomem_bulk:
-	/* Clean up potential freed allocations on bulk failure */
-	memset(slots, 0, max_req * sizeof(unsigned long));
-	mas->alloc->total = allocated;
-nomem_one:
-	mas_set_alloc_req(mas, requested);
-	mas_set_err(mas, -ENOMEM);
-}
+		mas->node_request = 0;
+		return;
+	}
 
-/*
- * mas_free() - Free an encoded maple node
- * @mas: The maple state
- * @used: The encoded maple node to free.
- *
- * Uses rcu free if necessary, pushes @used back on the maple state allocations
- * otherwise.
- */
-static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
-{
-	struct maple_node *tmp = mte_to_node(used);
+	mas->sheaf = mt_get_sheaf(gfp, mas->node_request);
+	if (likely(mas->sheaf)) {
+		mas->node_request = 0;
+		return;
+	}
 
-	if (mt_in_rcu(mas->tree))
-		ma_free_rcu(tmp);
-	else
-		mas_push_node(mas, tmp);
+error:
+	mas_set_err(mas, -ENOMEM);
 }
 
-/*
- * mas_node_count_gfp() - Check if enough nodes are allocated and request more
- * if there is not enough nodes.
- * @mas: The maple state
- * @count: The number of nodes needed
- * @gfp: the gfp flags
- */
-static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp)
+static inline void mas_empty_nodes(struct ma_state *mas)
 {
-	unsigned long allocated = mas_allocated(mas);
+	mas->node_request = 0;
+	if (mas->sheaf) {
+		mt_return_sheaf(mas->sheaf);
+		mas->sheaf = NULL;
+	}
 
-	if (allocated < count) {
-		mas_set_alloc_req(mas, count - allocated);
-		mas_alloc_nodes(mas, gfp);
+	if (mas->alloc) {
+		kfree(mas->alloc);
+		mas->alloc = NULL;
 	}
 }
 
 /*
- * mas_node_count() - Check if enough nodes are allocated and request more if
- * there is not enough nodes.
+ * mas_free() - Free an encoded maple node
  * @mas: The maple state
- * @count: The number of nodes needed
+ * @used: The encoded maple node to free.
  *
- * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags.
+ * Uses rcu free if necessary, pushes @used back on the maple state allocations
+ * otherwise.
  */
-static void mas_node_count(struct ma_state *mas, int count)
+static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
 {
-	return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN);
+	ma_free_rcu(mte_to_node(used));
 }
 
 /*
@@ -1878,21 +1699,7 @@ static inline int mab_calc_split(struct ma_state *mas,
 	 * end on a NULL entry, with the exception of the left-most leaf.  The
 	 * limitation means that the split of a node must be checked for this condition
 	 * and be able to put more data in one direction or the other.
-	 */
-	if (unlikely((mas->mas_flags & MA_STATE_BULK))) {
-		*mid_split = 0;
-		split = b_end - mt_min_slots[bn->type];
-
-		if (!ma_is_leaf(bn->type))
-			return split;
-
-		mas->mas_flags |= MA_STATE_REBALANCE;
-		if (!bn->slot[split])
-			split--;
-		return split;
-	}
-
-	/*
+	 *
 	 * Although extremely rare, it is possible to enter what is known as the 3-way
 	 * split scenario.  The 3-way split comes about by means of a store of a range
 	 * that overwrites the end and beginning of two full nodes.  The result is a set
@@ -2040,27 +1847,6 @@ static inline void mab_mas_cp(struct maple_big_node *b_node,
 }
 
 /*
- * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert.
- * @mas: The maple state
- * @end: The maple node end
- * @mt: The maple node type
- */
-static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end,
-				      enum maple_type mt)
-{
-	if (!(mas->mas_flags & MA_STATE_BULK))
-		return;
-
-	if (mte_is_root(mas->node))
-		return;
-
-	if (end > mt_min_slots[mt]) {
-		mas->mas_flags &= ~MA_STATE_REBALANCE;
-		return;
-	}
-}
-
-/*
  * mas_store_b_node() - Store an @entry into the b_node while also copying the
  * data from a maple encoded node.
  * @wr_mas: the maple write state
@@ -2109,9 +1895,6 @@ static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas,
 	/* Handle new range ending before old range ends */
 	piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
 	if (piv > mas->last) {
-		if (piv == ULONG_MAX)
-			mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type);
-
 		if (offset_end != slot)
 			wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
 							  offset_end);
@@ -2523,10 +2306,7 @@ static inline void mas_topiary_node(struct ma_state *mas,
 	enode = tmp_mas->node;
 	tmp = mte_to_node(enode);
 	mte_set_node_dead(enode);
-	if (in_rcu)
-		ma_free_rcu(tmp);
-	else
-		mas_push_node(mas, tmp);
+	ma_free_rcu(tmp);
 }
 
 /*
@@ -3012,126 +2792,6 @@ static inline void mas_rebalance(struct ma_state *mas,
 }
 
 /*
- * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple
- * state.
- * @mas: The maple state
- * @end: The end of the left-most node.
- *
- * During a mass-insert event (such as forking), it may be necessary to
- * rebalance the left-most node when it is not sufficient.
- */
-static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end)
-{
-	enum maple_type mt = mte_node_type(mas->node);
-	struct maple_node reuse, *newnode, *parent, *new_left, *left, *node;
-	struct maple_enode *eparent, *old_eparent;
-	unsigned char offset, tmp, split = mt_slots[mt] / 2;
-	void __rcu **l_slots, **slots;
-	unsigned long *l_pivs, *pivs, gap;
-	bool in_rcu = mt_in_rcu(mas->tree);
-	unsigned char new_height = mas_mt_height(mas);
-
-	MA_STATE(l_mas, mas->tree, mas->index, mas->last);
-
-	l_mas = *mas;
-	mas_prev_sibling(&l_mas);
-
-	/* set up node. */
-	if (in_rcu) {
-		newnode = mas_pop_node(mas);
-	} else {
-		newnode = &reuse;
-	}
-
-	node = mas_mn(mas);
-	newnode->parent = node->parent;
-	slots = ma_slots(newnode, mt);
-	pivs = ma_pivots(newnode, mt);
-	left = mas_mn(&l_mas);
-	l_slots = ma_slots(left, mt);
-	l_pivs = ma_pivots(left, mt);
-	if (!l_slots[split])
-		split++;
-	tmp = mas_data_end(&l_mas) - split;
-
-	memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp);
-	memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp);
-	pivs[tmp] = l_mas.max;
-	memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end);
-	memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end);
-
-	l_mas.max = l_pivs[split];
-	mas->min = l_mas.max + 1;
-	old_eparent = mt_mk_node(mte_parent(l_mas.node),
-			     mas_parent_type(&l_mas, l_mas.node));
-	tmp += end;
-	if (!in_rcu) {
-		unsigned char max_p = mt_pivots[mt];
-		unsigned char max_s = mt_slots[mt];
-
-		if (tmp < max_p)
-			memset(pivs + tmp, 0,
-			       sizeof(unsigned long) * (max_p - tmp));
-
-		if (tmp < mt_slots[mt])
-			memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp));
-
-		memcpy(node, newnode, sizeof(struct maple_node));
-		ma_set_meta(node, mt, 0, tmp - 1);
-		mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node),
-			      l_pivs[split]);
-
-		/* Remove data from l_pivs. */
-		tmp = split + 1;
-		memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp));
-		memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp));
-		ma_set_meta(left, mt, 0, split);
-		eparent = old_eparent;
-
-		goto done;
-	}
-
-	/* RCU requires replacing both l_mas, mas, and parent. */
-	mas->node = mt_mk_node(newnode, mt);
-	ma_set_meta(newnode, mt, 0, tmp);
-
-	new_left = mas_pop_node(mas);
-	new_left->parent = left->parent;
-	mt = mte_node_type(l_mas.node);
-	slots = ma_slots(new_left, mt);
-	pivs = ma_pivots(new_left, mt);
-	memcpy(slots, l_slots, sizeof(void *) * split);
-	memcpy(pivs, l_pivs, sizeof(unsigned long) * split);
-	ma_set_meta(new_left, mt, 0, split);
-	l_mas.node = mt_mk_node(new_left, mt);
-
-	/* replace parent. */
-	offset = mte_parent_slot(mas->node);
-	mt = mas_parent_type(&l_mas, l_mas.node);
-	parent = mas_pop_node(mas);
-	slots = ma_slots(parent, mt);
-	pivs = ma_pivots(parent, mt);
-	memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node));
-	rcu_assign_pointer(slots[offset], mas->node);
-	rcu_assign_pointer(slots[offset - 1], l_mas.node);
-	pivs[offset - 1] = l_mas.max;
-	eparent = mt_mk_node(parent, mt);
-done:
-	gap = mas_leaf_max_gap(mas);
-	mte_set_gap(eparent, mte_parent_slot(mas->node), gap);
-	gap = mas_leaf_max_gap(&l_mas);
-	mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap);
-	mas_ascend(mas);
-
-	if (in_rcu) {
-		mas_replace_node(mas, old_eparent, new_height);
-		mas_adopt_children(mas, mas->node);
-	}
-
-	mas_update_gap(mas);
-}
-
-/*
  * mas_split_final_node() - Split the final node in a subtree operation.
  * @mast: the maple subtree state
  * @mas: The maple state
@@ -3837,8 +3497,6 @@ static inline void mas_wr_node_store(struct ma_wr_state *wr_mas,
 
 	if (mas->last == wr_mas->end_piv)
 		offset_end++; /* don't copy this offset */
-	else if (unlikely(wr_mas->r_max == ULONG_MAX))
-		mas_bulk_rebalance(mas, mas->end, wr_mas->type);
 
 	/* set up node. */
 	if (in_rcu) {
@@ -4174,7 +3832,7 @@ set_content:
  *
  * Return: Number of nodes required for preallocation.
  */
-static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
+static inline void mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
 {
 	struct ma_state *mas = wr_mas->mas;
 	unsigned char height = mas_mt_height(mas);
@@ -4220,7 +3878,7 @@ static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
 		WARN_ON_ONCE(1);
 	}
 
-	return ret;
+	mas->node_request = ret;
 }
 
 /*
@@ -4255,7 +3913,7 @@ static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas)
 	new_end = mas_wr_new_end(wr_mas);
 	/* Potential spanning rebalance collapsing a node */
 	if (new_end < mt_min_slots[wr_mas->type]) {
-		if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK))
+		if (!mte_is_root(mas->node))
 			return  wr_rebalance;
 		return wr_node_store;
 	}
@@ -4281,15 +3939,15 @@ static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas)
  */
 static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry)
 {
-	int request;
+	struct ma_state *mas = wr_mas->mas;
 
 	mas_wr_prealloc_setup(wr_mas);
-	wr_mas->mas->store_type = mas_wr_store_type(wr_mas);
-	request = mas_prealloc_calc(wr_mas, entry);
-	if (!request)
+	mas->store_type = mas_wr_store_type(wr_mas);
+	mas_prealloc_calc(wr_mas, entry);
+	if (!mas->node_request)
 		return;
 
-	mas_node_count(wr_mas->mas, request);
+	mas_alloc_nodes(mas, GFP_NOWAIT);
 }
 
 /**
@@ -5281,7 +4939,7 @@ static void mt_free_walk(struct rcu_head *head)
 	mt_free_bulk(node->slot_len, slots);
 
 free_leaf:
-	mt_free_rcu(&node->rcu);
+	kfree(node);
 }
 
 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
@@ -5365,7 +5023,7 @@ next:
 
 free_leaf:
 	if (free)
-		mt_free_rcu(&node->rcu);
+		kfree(node);
 	else
 		mt_clear_meta(mt, node, node->type);
 }
@@ -5402,7 +5060,6 @@ static inline void mte_destroy_walk(struct maple_enode *enode,
  */
 void *mas_store(struct ma_state *mas, void *entry)
 {
-	int request;
 	MA_WR_STATE(wr_mas, mas, entry);
 
 	trace_ma_write(__func__, mas, 0, entry);
@@ -5432,11 +5089,11 @@ void *mas_store(struct ma_state *mas, void *entry)
 		return wr_mas.content;
 	}
 
-	request = mas_prealloc_calc(&wr_mas, entry);
-	if (!request)
+	mas_prealloc_calc(&wr_mas, entry);
+	if (!mas->node_request)
 		goto store;
 
-	mas_node_count(mas, request);
+	mas_alloc_nodes(mas, GFP_NOWAIT);
 	if (mas_is_err(mas))
 		return NULL;
 
@@ -5524,20 +5181,19 @@ EXPORT_SYMBOL_GPL(mas_store_prealloc);
 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
 {
 	MA_WR_STATE(wr_mas, mas, entry);
-	int ret = 0;
-	int request;
 
 	mas_wr_prealloc_setup(&wr_mas);
 	mas->store_type = mas_wr_store_type(&wr_mas);
-	request = mas_prealloc_calc(&wr_mas, entry);
-	if (!request)
+	mas_prealloc_calc(&wr_mas, entry);
+	if (!mas->node_request)
 		goto set_flag;
 
 	mas->mas_flags &= ~MA_STATE_PREALLOC;
-	mas_node_count_gfp(mas, request, gfp);
+	mas_alloc_nodes(mas, gfp);
 	if (mas_is_err(mas)) {
-		mas_set_alloc_req(mas, 0);
-		ret = xa_err(mas->node);
+		int ret = xa_err(mas->node);
+
+		mas->node_request = 0;
 		mas_destroy(mas);
 		mas_reset(mas);
 		return ret;
@@ -5545,7 +5201,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
 
 set_flag:
 	mas->mas_flags |= MA_STATE_PREALLOC;
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(mas_preallocate);
 
@@ -5559,109 +5215,11 @@ EXPORT_SYMBOL_GPL(mas_preallocate);
  */
 void mas_destroy(struct ma_state *mas)
 {
-	struct maple_alloc *node;
-	unsigned long total;
-
-	/*
-	 * When using mas_for_each() to insert an expected number of elements,
-	 * it is possible that the number inserted is less than the expected
-	 * number.  To fix an invalid final node, a check is performed here to
-	 * rebalance the previous node with the final node.
-	 */
-	if (mas->mas_flags & MA_STATE_REBALANCE) {
-		unsigned char end;
-		if (mas_is_err(mas))
-			mas_reset(mas);
-		mas_start(mas);
-		mtree_range_walk(mas);
-		end = mas->end + 1;
-		if (end < mt_min_slot_count(mas->node) - 1)
-			mas_destroy_rebalance(mas, end);
-
-		mas->mas_flags &= ~MA_STATE_REBALANCE;
-	}
-	mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
-
-	total = mas_allocated(mas);
-	while (total) {
-		node = mas->alloc;
-		mas->alloc = node->slot[0];
-		if (node->node_count > 1) {
-			size_t count = node->node_count - 1;
-
-			mt_free_bulk(count, (void __rcu **)&node->slot[1]);
-			total -= count;
-		}
-		mt_free_one(ma_mnode_ptr(node));
-		total--;
-	}
-
-	mas->alloc = NULL;
+	mas->mas_flags &= ~MA_STATE_PREALLOC;
+	mas_empty_nodes(mas);
 }
 EXPORT_SYMBOL_GPL(mas_destroy);
 
-/*
- * mas_expected_entries() - Set the expected number of entries that will be inserted.
- * @mas: The maple state
- * @nr_entries: The number of expected entries.
- *
- * This will attempt to pre-allocate enough nodes to store the expected number
- * of entries.  The allocations will occur using the bulk allocator interface
- * for speed.  Please call mas_destroy() on the @mas after inserting the entries
- * to ensure any unused nodes are freed.
- *
- * Return: 0 on success, -ENOMEM if memory could not be allocated.
- */
-int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
-{
-	int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2;
-	struct maple_enode *enode = mas->node;
-	int nr_nodes;
-	int ret;
-
-	/*
-	 * Sometimes it is necessary to duplicate a tree to a new tree, such as
-	 * forking a process and duplicating the VMAs from one tree to a new
-	 * tree.  When such a situation arises, it is known that the new tree is
-	 * not going to be used until the entire tree is populated.  For
-	 * performance reasons, it is best to use a bulk load with RCU disabled.
-	 * This allows for optimistic splitting that favours the left and reuse
-	 * of nodes during the operation.
-	 */
-
-	/* Optimize splitting for bulk insert in-order */
-	mas->mas_flags |= MA_STATE_BULK;
-
-	/*
-	 * Avoid overflow, assume a gap between each entry and a trailing null.
-	 * If this is wrong, it just means allocation can happen during
-	 * insertion of entries.
-	 */
-	nr_nodes = max(nr_entries, nr_entries * 2 + 1);
-	if (!mt_is_alloc(mas->tree))
-		nonleaf_cap = MAPLE_RANGE64_SLOTS - 2;
-
-	/* Leaves; reduce slots to keep space for expansion */
-	nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2);
-	/* Internal nodes */
-	nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap);
-	/* Add working room for split (2 nodes) + new parents */
-	mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL);
-
-	/* Detect if allocations run out */
-	mas->mas_flags |= MA_STATE_PREALLOC;
-
-	if (!mas_is_err(mas))
-		return 0;
-
-	ret = xa_err(mas->node);
-	mas->node = enode;
-	mas_destroy(mas);
-	return ret;
-
-}
-EXPORT_SYMBOL_GPL(mas_expected_entries);
-
 static void mas_may_activate(struct ma_state *mas)
 {
 	if (!mas->node) {
@@ -6293,7 +5851,7 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp)
 		mas_alloc_nodes(mas, gfp);
 	}
 
-	if (!mas_allocated(mas))
+	if (!mas->sheaf && !mas->alloc)
 		return false;
 
 	mas->status = ma_start;
@@ -6302,9 +5860,14 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp)
 
 void __init maple_tree_init(void)
 {
+	struct kmem_cache_args args = {
+		.align  = sizeof(struct maple_node),
+		.sheaf_capacity = 32,
+	};
+
 	maple_node_cache = kmem_cache_create("maple_node",
-			sizeof(struct maple_node), sizeof(struct maple_node),
-			SLAB_PANIC, NULL);
+			sizeof(struct maple_node), &args,
+			SLAB_PANIC);
 }
 
 /**
@@ -6637,7 +6200,7 @@ static void mas_dup_free(struct ma_state *mas)
 	}
 
 	node = mte_to_node(mas->node);
-	mt_free_one(node);
+	kfree(node);
 }
 
 /*
@@ -6678,7 +6241,7 @@ static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
 	struct maple_node *node = mte_to_node(mas->node);
 	struct maple_node *new_node = mte_to_node(new_mas->node);
 	enum maple_type type;
-	unsigned char request, count, i;
+	unsigned char count, i;
 	void __rcu **slots;
 	void __rcu **new_slots;
 	unsigned long val;
@@ -6686,20 +6249,17 @@ static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
 	/* Allocate memory for child nodes. */
 	type = mte_node_type(mas->node);
 	new_slots = ma_slots(new_node, type);
-	request = mas_data_end(mas) + 1;
-	count = mt_alloc_bulk(gfp, request, (void **)new_slots);
-	if (unlikely(count < request)) {
-		memset(new_slots, 0, request * sizeof(void *));
-		mas_set_err(mas, -ENOMEM);
+	count = mas->node_request = mas_data_end(mas) + 1;
+	mas_alloc_nodes(mas, gfp);
+	if (unlikely(mas_is_err(mas)))
 		return;
-	}
 
-	/* Restore node type information in slots. */
 	slots = ma_slots(node, type);
 	for (i = 0; i < count; i++) {
 		val = (unsigned long)mt_slot_locked(mas->tree, slots, i);
 		val &= MAPLE_NODE_MASK;
-		((unsigned long *)new_slots)[i] |= val;
+		new_slots[i] = ma_mnode_ptr((unsigned long)mas_pop_node(mas) |
+					    val);
 	}
 }
 
@@ -6753,7 +6313,7 @@ static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas,
 			/* Only allocate child nodes for non-leaf nodes. */
 			mas_dup_alloc(mas, new_mas, gfp);
 			if (unlikely(mas_is_err(mas)))
-				return;
+				goto empty_mas;
 		} else {
 			/*
 			 * This is the last leaf node and duplication is
@@ -6786,6 +6346,8 @@ set_new_tree:
 	/* Make them the same height */
 	new_mas->tree->ma_flags = mas->tree->ma_flags;
 	rcu_assign_pointer(new_mas->tree->ma_root, root);
+empty_mas:
+	mas_empty_nodes(mas);
 }
 
 /**
@@ -7683,8 +7245,9 @@ void mas_dump(const struct ma_state *mas)
 
 	pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end,
 	       mas->index, mas->last);
-	pr_err("     min=%lx max=%lx alloc=" PTR_FMT ", depth=%u, flags=%x\n",
-	       mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags);
+	pr_err("     min=%lx max=%lx sheaf=" PTR_FMT ", request %lu depth=%u, flags=%x\n",
+	       mas->min, mas->max, mas->sheaf, mas->node_request, mas->depth,
+	       mas->mas_flags);
 	if (mas->index > mas->last)
 		pr_err("Check index & last\n");
 }
diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index cb3936595b0d..14fbbee32046 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -2746,139 +2746,6 @@ static noinline void __init check_fuzzer(struct maple_tree *mt)
 	mtree_test_erase(mt, ULONG_MAX - 10);
 }
 
-/* duplicate the tree with a specific gap */
-static noinline void __init check_dup_gaps(struct maple_tree *mt,
-				    unsigned long nr_entries, bool zero_start,
-				    unsigned long gap)
-{
-	unsigned long i = 0;
-	struct maple_tree newmt;
-	int ret;
-	void *tmp;
-	MA_STATE(mas, mt, 0, 0);
-	MA_STATE(newmas, &newmt, 0, 0);
-	struct rw_semaphore newmt_lock;
-
-	init_rwsem(&newmt_lock);
-	mt_set_external_lock(&newmt, &newmt_lock);
-
-	if (!zero_start)
-		i = 1;
-
-	mt_zero_nr_tallocated();
-	for (; i <= nr_entries; i++)
-		mtree_store_range(mt, i*10, (i+1)*10 - gap,
-				  xa_mk_value(i), GFP_KERNEL);
-
-	mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN);
-	mt_set_non_kernel(99999);
-	down_write(&newmt_lock);
-	ret = mas_expected_entries(&newmas, nr_entries);
-	mt_set_non_kernel(0);
-	MT_BUG_ON(mt, ret != 0);
-
-	rcu_read_lock();
-	mas_for_each(&mas, tmp, ULONG_MAX) {
-		newmas.index = mas.index;
-		newmas.last = mas.last;
-		mas_store(&newmas, tmp);
-	}
-	rcu_read_unlock();
-	mas_destroy(&newmas);
-
-	__mt_destroy(&newmt);
-	up_write(&newmt_lock);
-}
-
-/* Duplicate many sizes of trees.  Mainly to test expected entry values */
-static noinline void __init check_dup(struct maple_tree *mt)
-{
-	int i;
-	int big_start = 100010;
-
-	/* Check with a value at zero */
-	for (i = 10; i < 1000; i++) {
-		mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
-		check_dup_gaps(mt, i, true, 5);
-		mtree_destroy(mt);
-		rcu_barrier();
-	}
-
-	cond_resched();
-	mt_cache_shrink();
-	/* Check with a value at zero, no gap */
-	for (i = 1000; i < 2000; i++) {
-		mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
-		check_dup_gaps(mt, i, true, 0);
-		mtree_destroy(mt);
-		rcu_barrier();
-	}
-
-	cond_resched();
-	mt_cache_shrink();
-	/* Check with a value at zero and unreasonably large */
-	for (i = big_start; i < big_start + 10; i++) {
-		mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
-		check_dup_gaps(mt, i, true, 5);
-		mtree_destroy(mt);
-		rcu_barrier();
-	}
-
-	cond_resched();
-	mt_cache_shrink();
-	/* Small to medium size not starting at zero*/
-	for (i = 200; i < 1000; i++) {
-		mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
-		check_dup_gaps(mt, i, false, 5);
-		mtree_destroy(mt);
-		rcu_barrier();
-	}
-
-	cond_resched();
-	mt_cache_shrink();
-	/* Unreasonably large not starting at zero*/
-	for (i = big_start; i < big_start + 10; i++) {
-		mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
-		check_dup_gaps(mt, i, false, 5);
-		mtree_destroy(mt);
-		rcu_barrier();
-		cond_resched();
-		mt_cache_shrink();
-	}
-
-	/* Check non-allocation tree not starting at zero */
-	for (i = 1500; i < 3000; i++) {
-		mt_init_flags(mt, 0);
-		check_dup_gaps(mt, i, false, 5);
-		mtree_destroy(mt);
-		rcu_barrier();
-		cond_resched();
-		if (i % 2 == 0)
-			mt_cache_shrink();
-	}
-
-	mt_cache_shrink();
-	/* Check non-allocation tree starting at zero */
-	for (i = 200; i < 1000; i++) {
-		mt_init_flags(mt, 0);
-		check_dup_gaps(mt, i, true, 5);
-		mtree_destroy(mt);
-		rcu_barrier();
-		cond_resched();
-	}
-
-	mt_cache_shrink();
-	/* Unreasonably large */
-	for (i = big_start + 5; i < big_start + 10; i++) {
-		mt_init_flags(mt, 0);
-		check_dup_gaps(mt, i, true, 5);
-		mtree_destroy(mt);
-		rcu_barrier();
-		mt_cache_shrink();
-		cond_resched();
-	}
-}
-
 static noinline void __init check_bnode_min_spanning(struct maple_tree *mt)
 {
 	int i = 50;
@@ -4078,10 +3945,6 @@ static int __init maple_tree_seed(void)
 	mtree_destroy(&tree);
 
 	mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
-	check_dup(&tree);
-	mtree_destroy(&tree);
-
-	mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
 	check_bnode_min_spanning(&tree);
 	mtree_destroy(&tree);