summaryrefslogtreecommitdiff
path: root/security
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-11 15:57:08 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-11 15:57:08 -0800
commitc22e26bd0906e9c8325462993f01adb16b8ea2c0 (patch)
tree19085170f70859c43b5ee7fe742c35d25fcadd57 /security
parentd0e91e401e31959154b6518c29d130b1973e3785 (diff)
parente265b330b93e3a3f9ff5256451d4f09b5f89b239 (diff)
Merge tag 'landlock-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux
Pull landlock updates from Mickaël Salaün: - extend Landlock to enforce restrictions on a whole process, similarly to the seccomp's TSYNC flag - refactor data structures to simplify code and improve performance - add documentation to cover missing parts * tag 'landlock-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux: mailmap: Add entry for Mickaël Salaün landlock: Transpose the layer masks data structure landlock: Add access_mask_subset() helper selftests/landlock: Add filesystem access benchmark landlock: Document audit blocker field format landlock: Add errata documentation section landlock: Add backwards compatibility for restrict flags landlock: Refactor TCP socket type check landlock: Minor reword of docs for TCP access rights landlock: Document LANDLOCK_RESTRICT_SELF_TSYNC selftests/landlock: Add LANDLOCK_RESTRICT_SELF_TSYNC tests landlock: Multithreading support for landlock_restrict_self()
Diffstat (limited to 'security')
-rw-r--r--security/landlock/Makefile11
-rw-r--r--security/landlock/access.h35
-rw-r--r--security/landlock/audit.c81
-rw-r--r--security/landlock/audit.h3
-rw-r--r--security/landlock/cred.h12
-rw-r--r--security/landlock/domain.c44
-rw-r--r--security/landlock/domain.h3
-rw-r--r--security/landlock/errata/abi-1.h8
-rw-r--r--security/landlock/errata/abi-4.h7
-rw-r--r--security/landlock/errata/abi-6.h10
-rw-r--r--security/landlock/fs.c352
-rw-r--r--security/landlock/limits.h2
-rw-r--r--security/landlock/net.c30
-rw-r--r--security/landlock/ruleset.c91
-rw-r--r--security/landlock/ruleset.h6
-rw-r--r--security/landlock/syscalls.c65
-rw-r--r--security/landlock/tsync.c561
-rw-r--r--security/landlock/tsync.h16
18 files changed, 960 insertions, 377 deletions
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
index 3160c2bdac1d..ffa7646d99f3 100644
--- a/security/landlock/Makefile
+++ b/security/landlock/Makefile
@@ -1,7 +1,14 @@
obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
-landlock-y := setup.o syscalls.o object.o ruleset.o \
- cred.o task.o fs.o
+landlock-y := \
+ setup.o \
+ syscalls.o \
+ object.o \
+ ruleset.o \
+ cred.o \
+ task.o \
+ fs.o \
+ tsync.o
landlock-$(CONFIG_INET) += net.o
diff --git a/security/landlock/access.h b/security/landlock/access.h
index 7961c6630a2d..42c95747d7bd 100644
--- a/security/landlock/access.h
+++ b/security/landlock/access.h
@@ -61,14 +61,30 @@ union access_masks_all {
static_assert(sizeof(typeof_member(union access_masks_all, masks)) ==
sizeof(typeof_member(union access_masks_all, all)));
-typedef u16 layer_mask_t;
-
-/* Makes sure all layers can be checked. */
-static_assert(BITS_PER_TYPE(layer_mask_t) >= LANDLOCK_MAX_NUM_LAYERS);
+/**
+ * struct layer_access_masks - A boolean matrix of layers and access rights
+ *
+ * This has a bit for each combination of layer numbers and access rights.
+ * During access checks, it is used to represent the access rights for each
+ * layer which still need to be fulfilled. When all bits are 0, the access
+ * request is considered to be fulfilled.
+ */
+struct layer_access_masks {
+ /**
+ * @access: The unfulfilled access rights for each layer.
+ */
+ access_mask_t access[LANDLOCK_MAX_NUM_LAYERS];
+};
/*
- * Tracks domains responsible of a denied access. This is required to avoid
- * storing in each object the full layer_masks[] required by update_request().
+ * Tracks domains responsible of a denied access. This avoids storing in each
+ * object the full matrix of per-layer unfulfilled access rights, which is
+ * required by update_request().
+ *
+ * Each nibble represents the layer index of the newest layer which denied a
+ * certain access right. For file system access rights, the upper four bits are
+ * the index of the layer which denies LANDLOCK_ACCESS_FS_IOCTL_DEV and the
+ * lower nibble represents LANDLOCK_ACCESS_FS_TRUNCATE.
*/
typedef u8 deny_masks_t;
@@ -97,4 +113,11 @@ landlock_upgrade_handled_access_masks(struct access_masks access_masks)
return access_masks;
}
+/* Checks the subset relation between access masks. */
+static inline bool access_mask_subset(access_mask_t subset,
+ access_mask_t superset)
+{
+ return (subset | superset) == superset;
+}
+
#endif /* _SECURITY_LANDLOCK_ACCESS_H */
diff --git a/security/landlock/audit.c b/security/landlock/audit.c
index e899995f1fd5..60ff217ab95b 100644
--- a/security/landlock/audit.c
+++ b/security/landlock/audit.c
@@ -180,38 +180,21 @@ static void test_get_hierarchy(struct kunit *const test)
#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */
+/* Get the youngest layer that denied the access_request. */
static size_t get_denied_layer(const struct landlock_ruleset *const domain,
access_mask_t *const access_request,
- const layer_mask_t (*const layer_masks)[],
- const size_t layer_masks_size)
+ const struct layer_access_masks *masks)
{
- const unsigned long access_req = *access_request;
- unsigned long access_bit;
- access_mask_t missing = 0;
- long youngest_layer = -1;
-
- for_each_set_bit(access_bit, &access_req, layer_masks_size) {
- const layer_mask_t mask = (*layer_masks)[access_bit];
- long layer;
-
- if (!mask)
- continue;
-
- /* __fls(1) == 0 */
- layer = __fls(mask);
- if (layer > youngest_layer) {
- youngest_layer = layer;
- missing = BIT(access_bit);
- } else if (layer == youngest_layer) {
- missing |= BIT(access_bit);
+ for (ssize_t i = ARRAY_SIZE(masks->access) - 1; i >= 0; i--) {
+ if (masks->access[i] & *access_request) {
+ *access_request &= masks->access[i];
+ return i;
}
}
- *access_request = missing;
- if (youngest_layer == -1)
- return domain->num_layers - 1;
-
- return youngest_layer;
+ /* Not found - fall back to default values */
+ *access_request = 0;
+ return domain->num_layers - 1;
}
#ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST
@@ -221,50 +204,39 @@ static void test_get_denied_layer(struct kunit *const test)
const struct landlock_ruleset dom = {
.num_layers = 5,
};
- const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {
- [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT(0),
- [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_FILE)] = BIT(1),
- [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_DIR)] = BIT(1) | BIT(0),
- [BIT_INDEX(LANDLOCK_ACCESS_FS_REMOVE_DIR)] = BIT(2),
+ const struct layer_access_masks masks = {
+ .access[0] = LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_READ_DIR,
+ .access[1] = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR,
+ .access[2] = LANDLOCK_ACCESS_FS_REMOVE_DIR,
};
access_mask_t access;
access = LANDLOCK_ACCESS_FS_EXECUTE;
- KUNIT_EXPECT_EQ(test, 0,
- get_denied_layer(&dom, &access, &layer_masks,
- sizeof(layer_masks)));
+ KUNIT_EXPECT_EQ(test, 0, get_denied_layer(&dom, &access, &masks));
KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_EXECUTE);
access = LANDLOCK_ACCESS_FS_READ_FILE;
- KUNIT_EXPECT_EQ(test, 1,
- get_denied_layer(&dom, &access, &layer_masks,
- sizeof(layer_masks)));
+ KUNIT_EXPECT_EQ(test, 1, get_denied_layer(&dom, &access, &masks));
KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_READ_FILE);
access = LANDLOCK_ACCESS_FS_READ_DIR;
- KUNIT_EXPECT_EQ(test, 1,
- get_denied_layer(&dom, &access, &layer_masks,
- sizeof(layer_masks)));
+ KUNIT_EXPECT_EQ(test, 1, get_denied_layer(&dom, &access, &masks));
KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_READ_DIR);
access = LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_READ_DIR;
- KUNIT_EXPECT_EQ(test, 1,
- get_denied_layer(&dom, &access, &layer_masks,
- sizeof(layer_masks)));
+ KUNIT_EXPECT_EQ(test, 1, get_denied_layer(&dom, &access, &masks));
KUNIT_EXPECT_EQ(test, access,
LANDLOCK_ACCESS_FS_READ_FILE |
LANDLOCK_ACCESS_FS_READ_DIR);
access = LANDLOCK_ACCESS_FS_EXECUTE | LANDLOCK_ACCESS_FS_READ_DIR;
- KUNIT_EXPECT_EQ(test, 1,
- get_denied_layer(&dom, &access, &layer_masks,
- sizeof(layer_masks)));
+ KUNIT_EXPECT_EQ(test, 1, get_denied_layer(&dom, &access, &masks));
KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_READ_DIR);
access = LANDLOCK_ACCESS_FS_WRITE_FILE;
- KUNIT_EXPECT_EQ(test, 4,
- get_denied_layer(&dom, &access, &layer_masks,
- sizeof(layer_masks)));
+ KUNIT_EXPECT_EQ(test, 4, get_denied_layer(&dom, &access, &masks));
KUNIT_EXPECT_EQ(test, access, 0);
}
@@ -370,9 +342,6 @@ static bool is_valid_request(const struct landlock_request *const request)
return false;
}
- if (WARN_ON_ONCE(!!request->layer_masks ^ !!request->layer_masks_size))
- return false;
-
if (request->deny_masks) {
if (WARN_ON_ONCE(!request->all_existing_optional_access))
return false;
@@ -406,12 +375,12 @@ void landlock_log_denial(const struct landlock_cred_security *const subject,
if (missing) {
/* Gets the nearest domain that denies the request. */
if (request->layer_masks) {
- youngest_layer = get_denied_layer(
- subject->domain, &missing, request->layer_masks,
- request->layer_masks_size);
+ youngest_layer = get_denied_layer(subject->domain,
+ &missing,
+ request->layer_masks);
} else {
youngest_layer = get_layer_from_deny_masks(
- &missing, request->all_existing_optional_access,
+ &missing, _LANDLOCK_ACCESS_FS_OPTIONAL,
request->deny_masks);
}
youngest_denied =
diff --git a/security/landlock/audit.h b/security/landlock/audit.h
index 92428b7fc4d8..56778331b58c 100644
--- a/security/landlock/audit.h
+++ b/security/landlock/audit.h
@@ -43,8 +43,7 @@ struct landlock_request {
access_mask_t access;
/* Required fields for requests with layer masks. */
- const layer_mask_t (*layer_masks)[];
- size_t layer_masks_size;
+ const struct layer_access_masks *layer_masks;
/* Required fields for requests with deny masks. */
const access_mask_t all_existing_optional_access;
diff --git a/security/landlock/cred.h b/security/landlock/cred.h
index c82fe63ec598..c10a06727eb1 100644
--- a/security/landlock/cred.h
+++ b/security/landlock/cred.h
@@ -26,6 +26,8 @@
* This structure is packed to minimize the size of struct
* landlock_file_security. However, it is always aligned in the LSM cred blob,
* see lsm_set_blob_size().
+ *
+ * When updating this, also update landlock_cred_copy() if needed.
*/
struct landlock_cred_security {
/**
@@ -65,6 +67,16 @@ landlock_cred(const struct cred *cred)
return cred->security + landlock_blob_sizes.lbs_cred;
}
+static inline void landlock_cred_copy(struct landlock_cred_security *dst,
+ const struct landlock_cred_security *src)
+{
+ landlock_put_ruleset(dst->domain);
+
+ *dst = *src;
+
+ landlock_get_ruleset(src->domain);
+}
+
static inline struct landlock_ruleset *landlock_get_current_domain(void)
{
return landlock_cred(current_cred())->domain;
diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index a647b68e8d06..79cb3bbdf4c5 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -182,32 +182,36 @@ static void test_get_layer_deny_mask(struct kunit *const test)
deny_masks_t
landlock_get_deny_masks(const access_mask_t all_existing_optional_access,
const access_mask_t optional_access,
- const layer_mask_t (*const layer_masks)[],
- const size_t layer_masks_size)
+ const struct layer_access_masks *const masks)
{
const unsigned long access_opt = optional_access;
unsigned long access_bit;
deny_masks_t deny_masks = 0;
+ access_mask_t all_denied = 0;
/* This may require change with new object types. */
- WARN_ON_ONCE(access_opt !=
- (optional_access & all_existing_optional_access));
+ WARN_ON_ONCE(!access_mask_subset(optional_access,
+ all_existing_optional_access));
- if (WARN_ON_ONCE(!layer_masks))
+ if (WARN_ON_ONCE(!masks))
return 0;
if (WARN_ON_ONCE(!access_opt))
return 0;
- for_each_set_bit(access_bit, &access_opt, layer_masks_size) {
- const layer_mask_t mask = (*layer_masks)[access_bit];
+ for (ssize_t i = ARRAY_SIZE(masks->access) - 1; i >= 0; i--) {
+ const access_mask_t denied = masks->access[i] & optional_access;
+ const unsigned long newly_denied = denied & ~all_denied;
- if (!mask)
+ if (!newly_denied)
continue;
- /* __fls(1) == 0 */
- deny_masks |= get_layer_deny_mask(all_existing_optional_access,
- access_bit, __fls(mask));
+ for_each_set_bit(access_bit, &newly_denied,
+ 8 * sizeof(access_mask_t)) {
+ deny_masks |= get_layer_deny_mask(
+ all_existing_optional_access, access_bit, i);
+ }
+ all_denied |= denied;
}
return deny_masks;
}
@@ -216,28 +220,28 @@ landlock_get_deny_masks(const access_mask_t all_existing_optional_access,
static void test_landlock_get_deny_masks(struct kunit *const test)
{
- const layer_mask_t layers1[BITS_PER_TYPE(access_mask_t)] = {
- [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0) |
- BIT_ULL(9),
- [BIT_INDEX(LANDLOCK_ACCESS_FS_TRUNCATE)] = BIT_ULL(1),
- [BIT_INDEX(LANDLOCK_ACCESS_FS_IOCTL_DEV)] = BIT_ULL(2) |
- BIT_ULL(0),
+ const struct layer_access_masks layers1 = {
+ .access[0] = LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .access[1] = LANDLOCK_ACCESS_FS_TRUNCATE,
+ .access[2] = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .access[9] = LANDLOCK_ACCESS_FS_EXECUTE,
};
KUNIT_EXPECT_EQ(test, 0x1,
landlock_get_deny_masks(_LANDLOCK_ACCESS_FS_OPTIONAL,
LANDLOCK_ACCESS_FS_TRUNCATE,
- &layers1, ARRAY_SIZE(layers1)));
+ &layers1));
KUNIT_EXPECT_EQ(test, 0x20,
landlock_get_deny_masks(_LANDLOCK_ACCESS_FS_OPTIONAL,
LANDLOCK_ACCESS_FS_IOCTL_DEV,
- &layers1, ARRAY_SIZE(layers1)));
+ &layers1));
KUNIT_EXPECT_EQ(
test, 0x21,
landlock_get_deny_masks(_LANDLOCK_ACCESS_FS_OPTIONAL,
LANDLOCK_ACCESS_FS_TRUNCATE |
LANDLOCK_ACCESS_FS_IOCTL_DEV,
- &layers1, ARRAY_SIZE(layers1)));
+ &layers1));
}
#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */
diff --git a/security/landlock/domain.h b/security/landlock/domain.h
index 621f054c9a2b..a9d57db0120d 100644
--- a/security/landlock/domain.h
+++ b/security/landlock/domain.h
@@ -122,8 +122,7 @@ struct landlock_hierarchy {
deny_masks_t
landlock_get_deny_masks(const access_mask_t all_existing_optional_access,
const access_mask_t optional_access,
- const layer_mask_t (*const layer_masks)[],
- size_t layer_masks_size);
+ const struct layer_access_masks *const masks);
int landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy);
diff --git a/security/landlock/errata/abi-1.h b/security/landlock/errata/abi-1.h
index e8a2bff2e5b6..3f099555f059 100644
--- a/security/landlock/errata/abi-1.h
+++ b/security/landlock/errata/abi-1.h
@@ -12,5 +12,13 @@
* hierarchy down to its filesystem root and those from the related mount point
* hierarchy. This prevents access right widening through rename or link
* actions.
+ *
+ * Impact:
+ *
+ * Without this fix, it was possible to widen access rights through rename or
+ * link actions involving disconnected directories, potentially bypassing
+ * ``LANDLOCK_ACCESS_FS_REFER`` restrictions. This could allow privilege
+ * escalation in complex mount scenarios where directories become disconnected
+ * from their original mount points.
*/
LANDLOCK_ERRATUM(3)
diff --git a/security/landlock/errata/abi-4.h b/security/landlock/errata/abi-4.h
index c052ee54f89f..fe11ec7d7ddf 100644
--- a/security/landlock/errata/abi-4.h
+++ b/security/landlock/errata/abi-4.h
@@ -11,5 +11,12 @@
* :manpage:`bind(2)` and :manpage:`connect(2)` operations. This change ensures
* that only TCP sockets are subject to TCP access rights, allowing other
* protocols to operate without unnecessary restrictions.
+ *
+ * Impact:
+ *
+ * In kernels without this fix, using ``LANDLOCK_ACCESS_NET_BIND_TCP`` or
+ * ``LANDLOCK_ACCESS_NET_CONNECT_TCP`` would incorrectly restrict non-TCP
+ * stream protocols (SMC, MPTCP, SCTP), potentially breaking applications
+ * that rely on these protocols while using Landlock network restrictions.
*/
LANDLOCK_ERRATUM(1)
diff --git a/security/landlock/errata/abi-6.h b/security/landlock/errata/abi-6.h
index 5113a829f87e..5cb1475c7ea8 100644
--- a/security/landlock/errata/abi-6.h
+++ b/security/landlock/errata/abi-6.h
@@ -15,5 +15,15 @@
* interaction between threads of the same process should always be allowed.
* This change ensures that any thread is allowed to send signals to any other
* thread within the same process, regardless of their domain.
+ *
+ * Impact:
+ *
+ * This problem only manifests when the userspace process is itself using
+ * :manpage:`libpsx(3)` or an equivalent mechanism to enforce a Landlock policy
+ * on multiple already-running threads at once. Programs which enforce a
+ * Landlock policy at startup time and only then become multithreaded are not
+ * affected. Without this fix, signal scoping could break multi-threaded
+ * applications that expect threads within the same process to freely signal
+ * each other.
*/
LANDLOCK_ERRATUM(2)
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 8205673c8b1c..e764470f588c 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -331,7 +331,7 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
/* Files only get access rights that make sense. */
if (!d_is_dir(path->dentry) &&
- (access_rights | ACCESS_FILE) != ACCESS_FILE)
+ !access_mask_subset(access_rights, ACCESS_FILE))
return -EINVAL;
if (WARN_ON_ONCE(ruleset->num_layers != 1))
return -EINVAL;
@@ -399,56 +399,54 @@ static const struct access_masks any_fs = {
};
/*
+ * Returns true iff the child file with the given src_child access rights under
+ * src_parent would result in having the same or fewer access rights if it were
+ * moved under new_parent.
+ */
+static bool may_refer(const struct layer_access_masks *const src_parent,
+ const struct layer_access_masks *const src_child,
+ const struct layer_access_masks *const new_parent,
+ const bool child_is_dir)
+{
+ for (size_t i = 0; i < ARRAY_SIZE(new_parent->access); i++) {
+ access_mask_t child_access = src_parent->access[i] &
+ src_child->access[i];
+ access_mask_t parent_access = new_parent->access[i];
+
+ if (!child_is_dir) {
+ child_access &= ACCESS_FILE;
+ parent_access &= ACCESS_FILE;
+ }
+
+ if (!access_mask_subset(child_access, parent_access))
+ return false;
+ }
+ return true;
+}
+
+/*
* Check that a destination file hierarchy has more restrictions than a source
* file hierarchy. This is only used for link and rename actions.
*
- * @layer_masks_child2: Optional child masks.
+ * Returns: true if child1 may be moved from parent1 to parent2 without
+ * increasing its access rights. If child2 is set, an additional condition is
+ * that child2 may be used from parent2 to parent1 without increasing its access
+ * rights.
*/
-static bool no_more_access(
- const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
- const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS],
- const bool child1_is_directory,
- const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
- const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS],
- const bool child2_is_directory)
+static bool no_more_access(const struct layer_access_masks *const parent1,
+ const struct layer_access_masks *const child1,
+ const bool child1_is_dir,
+ const struct layer_access_masks *const parent2,
+ const struct layer_access_masks *const child2,
+ const bool child2_is_dir)
{
- unsigned long access_bit;
-
- for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2);
- access_bit++) {
- /* Ignores accesses that only make sense for directories. */
- const bool is_file_access =
- !!(BIT_ULL(access_bit) & ACCESS_FILE);
+ if (!may_refer(parent1, child1, parent2, child1_is_dir))
+ return false;
- if (child1_is_directory || is_file_access) {
- /*
- * Checks if the destination restrictions are a
- * superset of the source ones (i.e. inherited access
- * rights without child exceptions):
- * restrictions(parent2) >= restrictions(child1)
- */
- if ((((*layer_masks_parent1)[access_bit] &
- (*layer_masks_child1)[access_bit]) |
- (*layer_masks_parent2)[access_bit]) !=
- (*layer_masks_parent2)[access_bit])
- return false;
- }
+ if (!child2)
+ return true;
- if (!layer_masks_child2)
- continue;
- if (child2_is_directory || is_file_access) {
- /*
- * Checks inverted restrictions for RENAME_EXCHANGE:
- * restrictions(parent1) >= restrictions(child2)
- */
- if ((((*layer_masks_parent2)[access_bit] &
- (*layer_masks_child2)[access_bit]) |
- (*layer_masks_parent1)[access_bit]) !=
- (*layer_masks_parent1)[access_bit])
- return false;
- }
- }
- return true;
+ return may_refer(parent2, child2, parent1, child2_is_dir);
}
#define NMA_TRUE(...) KUNIT_EXPECT_TRUE(test, no_more_access(__VA_ARGS__))
@@ -458,25 +456,25 @@ static bool no_more_access(
static void test_no_more_access(struct kunit *const test)
{
- const layer_mask_t rx0[LANDLOCK_NUM_ACCESS_FS] = {
- [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0),
- [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_FILE)] = BIT_ULL(0),
+ const struct layer_access_masks rx0 = {
+ .access[0] = LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_READ_FILE,
};
- const layer_mask_t mx0[LANDLOCK_NUM_ACCESS_FS] = {
- [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0),
- [BIT_INDEX(LANDLOCK_ACCESS_FS_MAKE_REG)] = BIT_ULL(0),
+ const struct layer_access_masks mx0 = {
+ .access[0] = LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
};
- const layer_mask_t x0[LANDLOCK_NUM_ACCESS_FS] = {
- [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0),
+ const struct layer_access_masks x0 = {
+ .access[0] = LANDLOCK_ACCESS_FS_EXECUTE,
};
- const layer_mask_t x1[LANDLOCK_NUM_ACCESS_FS] = {
- [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(1),
+ const struct layer_access_masks x1 = {
+ .access[1] = LANDLOCK_ACCESS_FS_EXECUTE,
};
- const layer_mask_t x01[LANDLOCK_NUM_ACCESS_FS] = {
- [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0) |
- BIT_ULL(1),
+ const struct layer_access_masks x01 = {
+ .access[0] = LANDLOCK_ACCESS_FS_EXECUTE,
+ .access[1] = LANDLOCK_ACCESS_FS_EXECUTE,
};
- const layer_mask_t allows_all[LANDLOCK_NUM_ACCESS_FS] = {};
+ const struct layer_access_masks allows_all = {};
/* Checks without restriction. */
NMA_TRUE(&x0, &allows_all, false, &allows_all, NULL, false);
@@ -564,31 +562,30 @@ static void test_no_more_access(struct kunit *const test)
#undef NMA_TRUE
#undef NMA_FALSE
-static bool is_layer_masks_allowed(
- layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+static bool is_layer_masks_allowed(const struct layer_access_masks *masks)
{
- return !memchr_inv(layer_masks, 0, sizeof(*layer_masks));
+ return !memchr_inv(&masks->access, 0, sizeof(masks->access));
}
/*
- * Removes @layer_masks accesses that are not requested.
+ * Removes @masks accesses that are not requested.
*
* Returns true if the request is allowed, false otherwise.
*/
-static bool
-scope_to_request(const access_mask_t access_request,
- layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+static bool scope_to_request(const access_mask_t access_request,
+ struct layer_access_masks *masks)
{
- const unsigned long access_req = access_request;
- unsigned long access_bit;
+ bool saw_unfulfilled_access = false;
- if (WARN_ON_ONCE(!layer_masks))
+ if (WARN_ON_ONCE(!masks))
return true;
- for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks))
- (*layer_masks)[access_bit] = 0;
-
- return is_layer_masks_allowed(layer_masks);
+ for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) {
+ masks->access[i] &= access_request;
+ if (masks->access[i])
+ saw_unfulfilled_access = true;
+ }
+ return !saw_unfulfilled_access;
}
#ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST
@@ -596,48 +593,41 @@ scope_to_request(const access_mask_t access_request,
static void test_scope_to_request_with_exec_none(struct kunit *const test)
{
/* Allows everything. */
- layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
+ struct layer_access_masks masks = {};
/* Checks and scopes with execute. */
- KUNIT_EXPECT_TRUE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE,
- &layer_masks));
- KUNIT_EXPECT_EQ(test, 0,
- layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]);
- KUNIT_EXPECT_EQ(test, 0,
- layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]);
+ KUNIT_EXPECT_TRUE(test,
+ scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, &masks));
+ KUNIT_EXPECT_EQ(test, 0, masks.access[0]);
}
static void test_scope_to_request_with_exec_some(struct kunit *const test)
{
/* Denies execute and write. */
- layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {
- [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0),
- [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1),
+ struct layer_access_masks masks = {
+ .access[0] = LANDLOCK_ACCESS_FS_EXECUTE,
+ .access[1] = LANDLOCK_ACCESS_FS_WRITE_FILE,
};
/* Checks and scopes with execute. */
KUNIT_EXPECT_FALSE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE,
- &layer_masks));
- KUNIT_EXPECT_EQ(test, BIT_ULL(0),
- layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]);
- KUNIT_EXPECT_EQ(test, 0,
- layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]);
+ &masks));
+ KUNIT_EXPECT_EQ(test, LANDLOCK_ACCESS_FS_EXECUTE, masks.access[0]);
+ KUNIT_EXPECT_EQ(test, 0, masks.access[1]);
}
static void test_scope_to_request_without_access(struct kunit *const test)
{
/* Denies execute and write. */
- layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {
- [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0),
- [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1),
+ struct layer_access_masks masks = {
+ .access[0] = LANDLOCK_ACCESS_FS_EXECUTE,
+ .access[1] = LANDLOCK_ACCESS_FS_WRITE_FILE,
};
/* Checks and scopes without access request. */
- KUNIT_EXPECT_TRUE(test, scope_to_request(0, &layer_masks));
- KUNIT_EXPECT_EQ(test, 0,
- layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]);
- KUNIT_EXPECT_EQ(test, 0,
- layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]);
+ KUNIT_EXPECT_TRUE(test, scope_to_request(0, &masks));
+ KUNIT_EXPECT_EQ(test, 0, masks.access[0]);
+ KUNIT_EXPECT_EQ(test, 0, masks.access[1]);
}
#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */
@@ -646,20 +636,16 @@ static void test_scope_to_request_without_access(struct kunit *const test)
* Returns true if there is at least one access right different than
* LANDLOCK_ACCESS_FS_REFER.
*/
-static bool
-is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS],
- const access_mask_t access_request)
+static bool is_eacces(const struct layer_access_masks *masks,
+ const access_mask_t access_request)
{
- unsigned long access_bit;
- /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */
- const unsigned long access_check = access_request &
- ~LANDLOCK_ACCESS_FS_REFER;
-
- if (!layer_masks)
+ if (!masks)
return false;
- for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) {
- if ((*layer_masks)[access_bit])
+ for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) {
+ /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */
+ if (masks->access[i] & access_request &
+ ~LANDLOCK_ACCESS_FS_REFER)
return true;
}
return false;
@@ -672,37 +658,37 @@ is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS],
static void test_is_eacces_with_none(struct kunit *const test)
{
- const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
+ const struct layer_access_masks masks = {};
- IE_FALSE(&layer_masks, 0);
- IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER);
- IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE);
- IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE);
+ IE_FALSE(&masks, 0);
+ IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER);
+ IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE);
+ IE_FALSE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE);
}
static void test_is_eacces_with_refer(struct kunit *const test)
{
- const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {
- [BIT_INDEX(LANDLOCK_ACCESS_FS_REFER)] = BIT_ULL(0),
+ const struct layer_access_masks masks = {
+ .access[0] = LANDLOCK_ACCESS_FS_REFER,
};
- IE_FALSE(&layer_masks, 0);
- IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER);
- IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE);
- IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE);
+ IE_FALSE(&masks, 0);
+ IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER);
+ IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE);
+ IE_FALSE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE);
}
static void test_is_eacces_with_write(struct kunit *const test)
{
- const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {
- [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(0),
+ const struct layer_access_masks masks = {
+ .access[0] = LANDLOCK_ACCESS_FS_WRITE_FILE,
};
- IE_FALSE(&layer_masks, 0);
- IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER);
- IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE);
+ IE_FALSE(&masks, 0);
+ IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER);
+ IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE);
- IE_TRUE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE);
+ IE_TRUE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE);
}
#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */
@@ -752,26 +738,25 @@ static void test_is_eacces_with_write(struct kunit *const test)
* - true if the access request is granted;
* - false otherwise.
*/
-static bool is_access_to_paths_allowed(
- const struct landlock_ruleset *const domain,
- const struct path *const path,
- const access_mask_t access_request_parent1,
- layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
- struct landlock_request *const log_request_parent1,
- struct dentry *const dentry_child1,
- const access_mask_t access_request_parent2,
- layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
- struct landlock_request *const log_request_parent2,
- struct dentry *const dentry_child2)
+static bool
+is_access_to_paths_allowed(const struct landlock_ruleset *const domain,
+ const struct path *const path,
+ const access_mask_t access_request_parent1,
+ struct layer_access_masks *layer_masks_parent1,
+ struct landlock_request *const log_request_parent1,
+ struct dentry *const dentry_child1,
+ const access_mask_t access_request_parent2,
+ struct layer_access_masks *layer_masks_parent2,
+ struct landlock_request *const log_request_parent2,
+ struct dentry *const dentry_child2)
{
bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check,
child1_is_directory = true, child2_is_directory = true;
struct path walker_path;
access_mask_t access_masked_parent1, access_masked_parent2;
- layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS],
- _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS];
- layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL,
- (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL;
+ struct layer_access_masks _layer_masks_child1, _layer_masks_child2;
+ struct layer_access_masks *layer_masks_child1 = NULL,
+ *layer_masks_child2 = NULL;
if (!access_request_parent1 && !access_request_parent2)
return true;
@@ -811,22 +796,20 @@ static bool is_access_to_paths_allowed(
}
if (unlikely(dentry_child1)) {
- landlock_unmask_layers(
- find_rule(domain, dentry_child1),
- landlock_init_layer_masks(
- domain, LANDLOCK_MASK_ACCESS_FS,
- &_layer_masks_child1, LANDLOCK_KEY_INODE),
- &_layer_masks_child1, ARRAY_SIZE(_layer_masks_child1));
+ if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+ &_layer_masks_child1,
+ LANDLOCK_KEY_INODE))
+ landlock_unmask_layers(find_rule(domain, dentry_child1),
+ &_layer_masks_child1);
layer_masks_child1 = &_layer_masks_child1;
child1_is_directory = d_is_dir(dentry_child1);
}
if (unlikely(dentry_child2)) {
- landlock_unmask_layers(
- find_rule(domain, dentry_child2),
- landlock_init_layer_masks(
- domain, LANDLOCK_MASK_ACCESS_FS,
- &_layer_masks_child2, LANDLOCK_KEY_INODE),
- &_layer_masks_child2, ARRAY_SIZE(_layer_masks_child2));
+ if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+ &_layer_masks_child2,
+ LANDLOCK_KEY_INODE))
+ landlock_unmask_layers(find_rule(domain, dentry_child2),
+ &_layer_masks_child2);
layer_masks_child2 = &_layer_masks_child2;
child2_is_directory = d_is_dir(dentry_child2);
}
@@ -881,16 +864,12 @@ static bool is_access_to_paths_allowed(
}
rule = find_rule(domain, walker_path.dentry);
- allowed_parent1 = allowed_parent1 ||
- landlock_unmask_layers(
- rule, access_masked_parent1,
- layer_masks_parent1,
- ARRAY_SIZE(*layer_masks_parent1));
- allowed_parent2 = allowed_parent2 ||
- landlock_unmask_layers(
- rule, access_masked_parent2,
- layer_masks_parent2,
- ARRAY_SIZE(*layer_masks_parent2));
+ allowed_parent1 =
+ allowed_parent1 ||
+ landlock_unmask_layers(rule, layer_masks_parent1);
+ allowed_parent2 =
+ allowed_parent2 ||
+ landlock_unmask_layers(rule, layer_masks_parent2);
/* Stops when a rule from each layer grants access. */
if (allowed_parent1 && allowed_parent2)
@@ -950,8 +929,6 @@ jump_up:
log_request_parent1->audit.u.path = *path;
log_request_parent1->access = access_masked_parent1;
log_request_parent1->layer_masks = layer_masks_parent1;
- log_request_parent1->layer_masks_size =
- ARRAY_SIZE(*layer_masks_parent1);
}
if (!allowed_parent2 && log_request_parent2) {
@@ -960,8 +937,6 @@ jump_up:
log_request_parent2->audit.u.path = *path;
log_request_parent2->access = access_masked_parent2;
log_request_parent2->layer_masks = layer_masks_parent2;
- log_request_parent2->layer_masks_size =
- ARRAY_SIZE(*layer_masks_parent2);
}
#endif /* CONFIG_AUDIT */
@@ -976,7 +951,7 @@ static int current_check_access_path(const struct path *const path,
};
const struct landlock_cred_security *const subject =
landlock_get_applicable_subject(current_cred(), masks, NULL);
- layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
+ struct layer_access_masks layer_masks;
struct landlock_request request = {};
if (!subject)
@@ -1051,12 +1026,11 @@ static access_mask_t maybe_remove(const struct dentry *const dentry)
* - true if all the domain access rights are allowed for @dir;
* - false if the walk reached @mnt_root.
*/
-static bool collect_domain_accesses(
- const struct landlock_ruleset *const domain,
- const struct dentry *const mnt_root, struct dentry *dir,
- layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS])
+static bool collect_domain_accesses(const struct landlock_ruleset *const domain,
+ const struct dentry *const mnt_root,
+ struct dentry *dir,
+ struct layer_access_masks *layer_masks_dom)
{
- unsigned long access_dom;
bool ret = false;
if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom))
@@ -1064,18 +1038,17 @@ static bool collect_domain_accesses(
if (is_nouser_or_private(dir))
return true;
- access_dom = landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
- layer_masks_dom,
- LANDLOCK_KEY_INODE);
+ if (!landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+ layer_masks_dom, LANDLOCK_KEY_INODE))
+ return true;
dget(dir);
while (true) {
struct dentry *parent_dentry;
/* Gets all layers allowing all domain accesses. */
- if (landlock_unmask_layers(find_rule(domain, dir), access_dom,
- layer_masks_dom,
- ARRAY_SIZE(*layer_masks_dom))) {
+ if (landlock_unmask_layers(find_rule(domain, dir),
+ layer_masks_dom)) {
/*
* Stops when all handled accesses are allowed by at
* least one rule in each layer.
@@ -1163,8 +1136,8 @@ static int current_check_refer_path(struct dentry *const old_dentry,
access_mask_t access_request_parent1, access_request_parent2;
struct path mnt_dir;
struct dentry *old_parent;
- layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {},
- layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {};
+ struct layer_access_masks layer_masks_parent1 = {},
+ layer_masks_parent2 = {};
struct landlock_request request1 = {}, request2 = {};
if (!subject)
@@ -1640,7 +1613,7 @@ static bool is_device(const struct file *const file)
static int hook_file_open(struct file *const file)
{
- layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
+ struct layer_access_masks layer_masks = {};
access_mask_t open_access_request, full_access_request, allowed_access,
optional_access;
const struct landlock_cred_security *const subject =
@@ -1675,20 +1648,14 @@ static int hook_file_open(struct file *const file)
&layer_masks, &request, NULL, 0, NULL, NULL, NULL)) {
allowed_access = full_access_request;
} else {
- unsigned long access_bit;
- const unsigned long access_req = full_access_request;
-
/*
* Calculate the actual allowed access rights from layer_masks.
- * Add each access right to allowed_access which has not been
- * vetoed by any layer.
+ * Remove the access rights from the full access request which
+ * are still unfulfilled in any of the layers.
*/
- allowed_access = 0;
- for_each_set_bit(access_bit, &access_req,
- ARRAY_SIZE(layer_masks)) {
- if (!layer_masks[access_bit])
- allowed_access |= BIT_ULL(access_bit);
- }
+ allowed_access = full_access_request;
+ for (size_t i = 0; i < ARRAY_SIZE(layer_masks.access); i++)
+ allowed_access &= ~layer_masks.access[i];
}
/*
@@ -1700,11 +1667,10 @@ static int hook_file_open(struct file *const file)
landlock_file(file)->allowed_access = allowed_access;
#ifdef CONFIG_AUDIT
landlock_file(file)->deny_masks = landlock_get_deny_masks(
- _LANDLOCK_ACCESS_FS_OPTIONAL, optional_access, &layer_masks,
- ARRAY_SIZE(layer_masks));
+ _LANDLOCK_ACCESS_FS_OPTIONAL, optional_access, &layer_masks);
#endif /* CONFIG_AUDIT */
- if ((open_access_request & allowed_access) == open_access_request)
+ if (access_mask_subset(open_access_request, allowed_access))
return 0;
/* Sets access to reflect the actual request. */
diff --git a/security/landlock/limits.h b/security/landlock/limits.h
index 65b5ff051674..eb584f47288d 100644
--- a/security/landlock/limits.h
+++ b/security/landlock/limits.h
@@ -31,7 +31,7 @@
#define LANDLOCK_MASK_SCOPE ((LANDLOCK_LAST_SCOPE << 1) - 1)
#define LANDLOCK_NUM_SCOPE __const_hweight64(LANDLOCK_MASK_SCOPE)
-#define LANDLOCK_LAST_RESTRICT_SELF LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF
+#define LANDLOCK_LAST_RESTRICT_SELF LANDLOCK_RESTRICT_SELF_TSYNC
#define LANDLOCK_MASK_RESTRICT_SELF ((LANDLOCK_LAST_RESTRICT_SELF << 1) - 1)
/* clang-format on */
diff --git a/security/landlock/net.c b/security/landlock/net.c
index e6367e30e5b0..c368649985c5 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -47,7 +47,7 @@ static int current_check_access_socket(struct socket *const sock,
access_mask_t access_request)
{
__be16 port;
- layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_NET] = {};
+ struct layer_access_masks layer_masks = {};
const struct landlock_rule *rule;
struct landlock_id id = {
.type = LANDLOCK_KEY_NET_PORT,
@@ -62,9 +62,6 @@ static int current_check_access_socket(struct socket *const sock,
if (!subject)
return 0;
- if (!sk_is_tcp(sock->sk))
- return 0;
-
/* Checks for minimal header length to safely read sa_family. */
if (addrlen < offsetofend(typeof(*address), sa_family))
return -EINVAL;
@@ -194,8 +191,10 @@ static int current_check_access_socket(struct socket *const sock,
access_request = landlock_init_layer_masks(subject->domain,
access_request, &layer_masks,
LANDLOCK_KEY_NET_PORT);
- if (landlock_unmask_layers(rule, access_request, &layer_masks,
- ARRAY_SIZE(layer_masks)))
+ if (!access_request)
+ return 0;
+
+ if (landlock_unmask_layers(rule, &layer_masks))
return 0;
audit_net.family = address->sa_family;
@@ -206,7 +205,6 @@ static int current_check_access_socket(struct socket *const sock,
.audit.u.net = &audit_net,
.access = access_request,
.layer_masks = &layer_masks,
- .layer_masks_size = ARRAY_SIZE(layer_masks),
});
return -EACCES;
}
@@ -214,16 +212,30 @@ static int current_check_access_socket(struct socket *const sock,
static int hook_socket_bind(struct socket *const sock,
struct sockaddr *const address, const int addrlen)
{
+ access_mask_t access_request;
+
+ if (sk_is_tcp(sock->sk))
+ access_request = LANDLOCK_ACCESS_NET_BIND_TCP;
+ else
+ return 0;
+
return current_check_access_socket(sock, address, addrlen,
- LANDLOCK_ACCESS_NET_BIND_TCP);
+ access_request);
}
static int hook_socket_connect(struct socket *const sock,
struct sockaddr *const address,
const int addrlen)
{
+ access_mask_t access_request;
+
+ if (sk_is_tcp(sock->sk))
+ access_request = LANDLOCK_ACCESS_NET_CONNECT_TCP;
+ else
+ return 0;
+
return current_check_access_socket(sock, address, addrlen,
- LANDLOCK_ACCESS_NET_CONNECT_TCP);
+ access_request);
}
static struct security_hook_list landlock_hooks[] __ro_after_init = {
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index 0a5b0c76b3f7..419b237de635 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -612,22 +612,24 @@ landlock_find_rule(const struct landlock_ruleset *const ruleset,
return NULL;
}
-/*
- * @layer_masks is read and may be updated according to the access request and
- * the matching rule.
- * @masks_array_size must be equal to ARRAY_SIZE(*layer_masks).
+/**
+ * landlock_unmask_layers - Remove the access rights in @masks
+ * which are granted in @rule
+ *
+ * Updates the set of (per-layer) unfulfilled access rights @masks
+ * so that all the access rights granted in @rule are removed from it
+ * (because they are now fulfilled).
+ *
+ * @rule: A rule that grants a set of access rights for each layer
+ * @masks: A matrix of unfulfilled access rights for each layer
*
- * Returns true if the request is allowed (i.e. relevant layer masks for the
- * request are empty).
+ * Returns true if the request is allowed (i.e. the access rights granted all
+ * remaining unfulfilled access rights and masks has no leftover set bits).
*/
bool landlock_unmask_layers(const struct landlock_rule *const rule,
- const access_mask_t access_request,
- layer_mask_t (*const layer_masks)[],
- const size_t masks_array_size)
+ struct layer_access_masks *masks)
{
- size_t layer_level;
-
- if (!access_request || !layer_masks)
+ if (!masks)
return true;
if (!rule)
return false;
@@ -642,28 +644,18 @@ bool landlock_unmask_layers(const struct landlock_rule *const rule,
* by only one rule, but by the union (binary OR) of multiple rules.
* E.g. /a/b <execute> + /a <read> => /a/b <execute + read>
*/
- for (layer_level = 0; layer_level < rule->num_layers; layer_level++) {
- const struct landlock_layer *const layer =
- &rule->layers[layer_level];
- const layer_mask_t layer_bit = BIT_ULL(layer->level - 1);
- const unsigned long access_req = access_request;
- unsigned long access_bit;
- bool is_empty;
+ for (size_t i = 0; i < rule->num_layers; i++) {
+ const struct landlock_layer *const layer = &rule->layers[i];
- /*
- * Records in @layer_masks which layer grants access to each requested
- * access: bit cleared if the related layer grants access.
- */
- is_empty = true;
- for_each_set_bit(access_bit, &access_req, masks_array_size) {
- if (layer->access & BIT_ULL(access_bit))
- (*layer_masks)[access_bit] &= ~layer_bit;
- is_empty = is_empty && !(*layer_masks)[access_bit];
- }
- if (is_empty)
- return true;
+ /* Clear the bits where the layer in the rule grants access. */
+ masks->access[layer->level - 1] &= ~layer->access;
+ }
+
+ for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) {
+ if (masks->access[i])
+ return false;
}
- return false;
+ return true;
}
typedef access_mask_t
@@ -673,13 +665,12 @@ get_access_mask_t(const struct landlock_ruleset *const ruleset,
/**
* landlock_init_layer_masks - Initialize layer masks from an access request
*
- * Populates @layer_masks such that for each access right in @access_request,
+ * Populates @masks such that for each access right in @access_request,
* the bits for all the layers are set where this access right is handled.
*
* @domain: The domain that defines the current restrictions.
* @access_request: The requested access rights to check.
- * @layer_masks: It must contain %LANDLOCK_NUM_ACCESS_FS or
- * %LANDLOCK_NUM_ACCESS_NET elements according to @key_type.
+ * @masks: Layer access masks to populate.
* @key_type: The key type to switch between access masks of different types.
*
* Returns: An access mask where each access right bit is set which is handled
@@ -688,23 +679,20 @@ get_access_mask_t(const struct landlock_ruleset *const ruleset,
access_mask_t
landlock_init_layer_masks(const struct landlock_ruleset *const domain,
const access_mask_t access_request,
- layer_mask_t (*const layer_masks)[],
+ struct layer_access_masks *const masks,
const enum landlock_key_type key_type)
{
access_mask_t handled_accesses = 0;
- size_t layer_level, num_access;
get_access_mask_t *get_access_mask;
switch (key_type) {
case LANDLOCK_KEY_INODE:
get_access_mask = landlock_get_fs_access_mask;
- num_access = LANDLOCK_NUM_ACCESS_FS;
break;
#if IS_ENABLED(CONFIG_INET)
case LANDLOCK_KEY_NET_PORT:
get_access_mask = landlock_get_net_access_mask;
- num_access = LANDLOCK_NUM_ACCESS_NET;
break;
#endif /* IS_ENABLED(CONFIG_INET) */
@@ -713,27 +701,18 @@ landlock_init_layer_masks(const struct landlock_ruleset *const domain,
return 0;
}
- memset(layer_masks, 0,
- array_size(sizeof((*layer_masks)[0]), num_access));
-
/* An empty access request can happen because of O_WRONLY | O_RDWR. */
if (!access_request)
return 0;
- /* Saves all handled accesses per layer. */
- for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
- const unsigned long access_req = access_request;
- const access_mask_t access_mask =
- get_access_mask(domain, layer_level);
- unsigned long access_bit;
-
- for_each_set_bit(access_bit, &access_req, num_access) {
- if (BIT_ULL(access_bit) & access_mask) {
- (*layer_masks)[access_bit] |=
- BIT_ULL(layer_level);
- handled_accesses |= BIT_ULL(access_bit);
- }
- }
+ for (size_t i = 0; i < domain->num_layers; i++) {
+ const access_mask_t handled = get_access_mask(domain, i);
+
+ masks->access[i] = access_request & handled;
+ handled_accesses |= masks->access[i];
}
+ for (size_t i = domain->num_layers; i < ARRAY_SIZE(masks->access); i++)
+ masks->access[i] = 0;
+
return handled_accesses;
}
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index 1a78cba662b2..9d6dc632684c 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -302,14 +302,12 @@ landlock_get_scope_mask(const struct landlock_ruleset *const ruleset,
}
bool landlock_unmask_layers(const struct landlock_rule *const rule,
- const access_mask_t access_request,
- layer_mask_t (*const layer_masks)[],
- const size_t masks_array_size);
+ struct layer_access_masks *masks);
access_mask_t
landlock_init_layer_masks(const struct landlock_ruleset *const domain,
const access_mask_t access_request,
- layer_mask_t (*const layer_masks)[],
+ struct layer_access_masks *masks,
const enum landlock_key_type key_type);
#endif /* _SECURITY_LANDLOCK_RULESET_H */
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index 0116e9f93ffe..0d66a68677b7 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -36,6 +36,7 @@
#include "net.h"
#include "ruleset.h"
#include "setup.h"
+#include "tsync.h"
static bool is_initialized(void)
{
@@ -157,11 +158,13 @@ static const struct file_operations ruleset_fops = {
/*
* The Landlock ABI version should be incremented for each new Landlock-related
* user space visible change (e.g. Landlock syscalls). This version should
- * only be incremented once per Linux release, and the date in
+ * only be incremented once per Linux release. When incrementing, the date in
* Documentation/userspace-api/landlock.rst should be updated to reflect the
* UAPI change.
+ * If the change involves a fix that requires userspace awareness, also update
+ * the errata documentation in Documentation/userspace-api/landlock.rst .
*/
-const int landlock_abi_version = 7;
+const int landlock_abi_version = 8;
/**
* sys_landlock_create_ruleset - Create a new ruleset
@@ -454,9 +457,10 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
* - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF
* - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON
* - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF
+ * - %LANDLOCK_RESTRICT_SELF_TSYNC
*
- * This system call enables to enforce a Landlock ruleset on the current
- * thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its
+ * This system call enforces a Landlock ruleset on the current thread.
+ * Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its
* namespace or is running with no_new_privs. This avoids scenarios where
* unprivileged tasks can affect the behavior of privileged children.
*
@@ -478,8 +482,7 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
flags)
{
- struct landlock_ruleset *new_dom,
- *ruleset __free(landlock_put_ruleset) = NULL;
+ struct landlock_ruleset *ruleset __free(landlock_put_ruleset) = NULL;
struct cred *new_cred;
struct landlock_cred_security *new_llcred;
bool __maybe_unused log_same_exec, log_new_exec, log_subdomains,
@@ -538,33 +541,43 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
* We could optimize this case by not calling commit_creds() if this flag
* was already set, but it is not worth the complexity.
*/
- if (!ruleset)
- return commit_creds(new_cred);
-
- /*
- * There is no possible race condition while copying and manipulating
- * the current credentials because they are dedicated per thread.
- */
- new_dom = landlock_merge_ruleset(new_llcred->domain, ruleset);
- if (IS_ERR(new_dom)) {
- abort_creds(new_cred);
- return PTR_ERR(new_dom);
- }
+ if (ruleset) {
+ /*
+ * There is no possible race condition while copying and
+ * manipulating the current credentials because they are
+ * dedicated per thread.
+ */
+ struct landlock_ruleset *const new_dom =
+ landlock_merge_ruleset(new_llcred->domain, ruleset);
+ if (IS_ERR(new_dom)) {
+ abort_creds(new_cred);
+ return PTR_ERR(new_dom);
+ }
#ifdef CONFIG_AUDIT
- new_dom->hierarchy->log_same_exec = log_same_exec;
- new_dom->hierarchy->log_new_exec = log_new_exec;
- if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains)
- new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED;
+ new_dom->hierarchy->log_same_exec = log_same_exec;
+ new_dom->hierarchy->log_new_exec = log_new_exec;
+ if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains)
+ new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED;
#endif /* CONFIG_AUDIT */
- /* Replaces the old (prepared) domain. */
- landlock_put_ruleset(new_llcred->domain);
- new_llcred->domain = new_dom;
+ /* Replaces the old (prepared) domain. */
+ landlock_put_ruleset(new_llcred->domain);
+ new_llcred->domain = new_dom;
#ifdef CONFIG_AUDIT
- new_llcred->domain_exec |= BIT(new_dom->num_layers - 1);
+ new_llcred->domain_exec |= BIT(new_dom->num_layers - 1);
#endif /* CONFIG_AUDIT */
+ }
+
+ if (flags & LANDLOCK_RESTRICT_SELF_TSYNC) {
+ const int err = landlock_restrict_sibling_threads(
+ current_cred(), new_cred);
+ if (err) {
+ abort_creds(new_cred);
+ return err;
+ }
+ }
return commit_creds(new_cred);
}
diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c
new file mode 100644
index 000000000000..0d2b9c646030
--- /dev/null
+++ b/security/landlock/tsync.c
@@ -0,0 +1,561 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock - Cross-thread ruleset enforcement
+ *
+ * Copyright © 2025 Google LLC
+ */
+
+#include <linux/atomic.h>
+#include <linux/cleanup.h>
+#include <linux/completion.h>
+#include <linux/cred.h>
+#include <linux/errno.h>
+#include <linux/overflow.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
+#include <linux/slab.h>
+#include <linux/task_work.h>
+
+#include "cred.h"
+#include "tsync.h"
+
+/*
+ * Shared state between multiple threads which are enforcing Landlock rulesets
+ * in lockstep with each other.
+ */
+struct tsync_shared_context {
+ /* The old and tentative new creds of the calling thread. */
+ const struct cred *old_cred;
+ const struct cred *new_cred;
+
+ /* True if sibling tasks need to set the no_new_privs flag. */
+ bool set_no_new_privs;
+
+ /* An error encountered in preparation step, or 0. */
+ atomic_t preparation_error;
+
+ /*
+ * Barrier after preparation step in restrict_one_thread.
+ * The calling thread waits for completion.
+ *
+ * Re-initialized on every round of looking for newly spawned threads.
+ */
+ atomic_t num_preparing;
+ struct completion all_prepared;
+
+ /* Sibling threads wait for completion. */
+ struct completion ready_to_commit;
+
+ /*
+ * Barrier after commit step (used by syscall impl to wait for
+ * completion).
+ */
+ atomic_t num_unfinished;
+ struct completion all_finished;
+};
+
+struct tsync_work {
+ struct callback_head work;
+ struct task_struct *task;
+ struct tsync_shared_context *shared_ctx;
+};
+
+/*
+ * restrict_one_thread - update a thread's Landlock domain in lockstep with the
+ * other threads in the same process
+ *
+ * When this is run, the same function gets run in all other threads in the same
+ * process (except for the calling thread which called landlock_restrict_self).
+ * The concurrently running invocations of restrict_one_thread coordinate
+ * through the shared ctx object to do their work in lockstep to implement
+ * all-or-nothing semantics for enforcing the new Landlock domain.
+ *
+ * Afterwards, depending on the presence of an error, all threads either commit
+ * or abort the prepared credentials. The commit operation can not fail any
+ * more.
+ */
+static void restrict_one_thread(struct tsync_shared_context *ctx)
+{
+ int err;
+ struct cred *cred = NULL;
+
+ if (current_cred() == ctx->old_cred) {
+ /*
+ * Switch out old_cred with new_cred, if possible.
+ *
+ * In the common case, where all threads initially point to the same
+ * struct cred, this optimization avoids creating separate redundant
+ * credentials objects for each, which would all have the same contents.
+ *
+ * Note: We are intentionally dropping the const qualifier here, because
+ * it is required by commit_creds() and abort_creds().
+ */
+ cred = (struct cred *)get_cred(ctx->new_cred);
+ } else {
+ /* Else, prepare new creds and populate them. */
+ cred = prepare_creds();
+
+ if (!cred) {
+ atomic_set(&ctx->preparation_error, -ENOMEM);
+
+ /*
+ * Even on error, we need to adhere to the protocol and coordinate
+ * with concurrently running invocations.
+ */
+ if (atomic_dec_return(&ctx->num_preparing) == 0)
+ complete_all(&ctx->all_prepared);
+
+ goto out;
+ }
+
+ landlock_cred_copy(landlock_cred(cred),
+ landlock_cred(ctx->new_cred));
+ }
+
+ /*
+ * Barrier: Wait until all threads are done preparing.
+ * After this point, we can have no more failures.
+ */
+ if (atomic_dec_return(&ctx->num_preparing) == 0)
+ complete_all(&ctx->all_prepared);
+
+ /*
+ * Wait for signal from calling thread that it's safe to read the
+ * preparation error now and we are ready to commit (or abort).
+ */
+ wait_for_completion(&ctx->ready_to_commit);
+
+ /* Abort the commit if any of the other threads had an error. */
+ err = atomic_read(&ctx->preparation_error);
+ if (err) {
+ abort_creds(cred);
+ goto out;
+ }
+
+ /*
+ * Make sure that all sibling tasks fulfill the no_new_privs prerequisite.
+ * (This is in line with Seccomp's SECCOMP_FILTER_FLAG_TSYNC logic in
+ * kernel/seccomp.c)
+ */
+ if (ctx->set_no_new_privs)
+ task_set_no_new_privs(current);
+
+ commit_creds(cred);
+
+out:
+ /* Notify the calling thread once all threads are done */
+ if (atomic_dec_return(&ctx->num_unfinished) == 0)
+ complete_all(&ctx->all_finished);
+}
+
+/*
+ * restrict_one_thread_callback - task_work callback for restricting a thread
+ *
+ * Calls restrict_one_thread with the struct landlock_shared_tsync_context.
+ */
+static void restrict_one_thread_callback(struct callback_head *work)
+{
+ struct tsync_work *ctx = container_of(work, struct tsync_work, work);
+
+ restrict_one_thread(ctx->shared_ctx);
+}
+
+/*
+ * struct tsync_works - a growable array of per-task contexts
+ *
+ * The zero-initialized struct represents the empty array.
+ */
+struct tsync_works {
+ struct tsync_work **works;
+ size_t size;
+ size_t capacity;
+};
+
+/*
+ * tsync_works_provide - provides a preallocated tsync_work for the given task
+ *
+ * This also stores a task pointer in the context and increments the reference
+ * count of the task.
+ *
+ * This function may fail in the case where we did not preallocate sufficient
+ * capacity. This can legitimately happen if new threads get started after we
+ * grew the capacity.
+ *
+ * Returns:
+ * A pointer to the preallocated context struct, with task filled in.
+ *
+ * NULL, if we ran out of preallocated context structs.
+ */
+static struct tsync_work *tsync_works_provide(struct tsync_works *s,
+ struct task_struct *task)
+{
+ struct tsync_work *ctx;
+
+ if (s->size >= s->capacity)
+ return NULL;
+
+ ctx = s->works[s->size];
+ s->size++;
+
+ ctx->task = get_task_struct(task);
+ return ctx;
+}
+
+/*
+ * tsync_works_grow_by - preallocates space for n more contexts in s
+ *
+ * On a successful return, the subsequent n calls to tsync_works_provide() are
+ * guaranteed to succeed. (size + n <= capacity)
+ *
+ * Returns:
+ * -ENOMEM if the (re)allocation fails
+
+ * 0 if the allocation succeeds, partially succeeds, or no reallocation
+ * was needed
+ */
+static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags)
+{
+ size_t i;
+ size_t new_capacity;
+ struct tsync_work **works;
+ struct tsync_work *work;
+
+ if (check_add_overflow(s->size, n, &new_capacity))
+ return -EOVERFLOW;
+
+ /* No need to reallocate if s already has sufficient capacity. */
+ if (new_capacity <= s->capacity)
+ return 0;
+
+ works = krealloc_array(s->works, new_capacity, sizeof(s->works[0]),
+ flags);
+ if (!works)
+ return -ENOMEM;
+
+ s->works = works;
+
+ for (i = s->capacity; i < new_capacity; i++) {
+ work = kzalloc(sizeof(*work), flags);
+ if (!work) {
+ /*
+ * Leave the object in a consistent state,
+ * but return an error.
+ */
+ s->capacity = i;
+ return -ENOMEM;
+ }
+ s->works[i] = work;
+ }
+ s->capacity = new_capacity;
+ return 0;
+}
+
+/*
+ * tsync_works_contains - checks for presence of task in s
+ */
+static bool tsync_works_contains_task(const struct tsync_works *s,
+ struct task_struct *task)
+{
+ size_t i;
+
+ for (i = 0; i < s->size; i++)
+ if (s->works[i]->task == task)
+ return true;
+ return false;
+}
+
+/*
+ * tsync_works_release - frees memory held by s and drops all task references
+ *
+ * This does not free s itself, only the data structures held by it.
+ */
+static void tsync_works_release(struct tsync_works *s)
+{
+ size_t i;
+
+ for (i = 0; i < s->size; i++) {
+ if (!s->works[i]->task)
+ continue;
+
+ put_task_struct(s->works[i]->task);
+ }
+
+ for (i = 0; i < s->capacity; i++)
+ kfree(s->works[i]);
+ kfree(s->works);
+ s->works = NULL;
+ s->size = 0;
+ s->capacity = 0;
+}
+
+/*
+ * count_additional_threads - counts the sibling threads that are not in works
+ */
+static size_t count_additional_threads(const struct tsync_works *works)
+{
+ struct task_struct *thread, *caller;
+ size_t n = 0;
+
+ caller = current;
+
+ guard(rcu)();
+
+ for_each_thread(caller, thread) {
+ /* Skip current, since it is initiating the sync. */
+ if (thread == caller)
+ continue;
+
+ /* Skip exited threads. */
+ if (thread->flags & PF_EXITING)
+ continue;
+
+ /* Skip threads that we have already seen. */
+ if (tsync_works_contains_task(works, thread))
+ continue;
+
+ n++;
+ }
+ return n;
+}
+
+/*
+ * schedule_task_work - adds task_work for all eligible sibling threads
+ * which have not been scheduled yet
+ *
+ * For each added task_work, atomically increments shared_ctx->num_preparing and
+ * shared_ctx->num_unfinished.
+ *
+ * Returns:
+ * true, if at least one eligible sibling thread was found
+ */
+static bool schedule_task_work(struct tsync_works *works,
+ struct tsync_shared_context *shared_ctx)
+{
+ int err;
+ struct task_struct *thread, *caller;
+ struct tsync_work *ctx;
+ bool found_more_threads = false;
+
+ caller = current;
+
+ guard(rcu)();
+
+ for_each_thread(caller, thread) {
+ /* Skip current, since it is initiating the sync. */
+ if (thread == caller)
+ continue;
+
+ /* Skip exited threads. */
+ if (thread->flags & PF_EXITING)
+ continue;
+
+ /* Skip threads that we already looked at. */
+ if (tsync_works_contains_task(works, thread))
+ continue;
+
+ /*
+ * We found a sibling thread that is not doing its task_work yet, and
+ * which might spawn new threads before our task work runs, so we need
+ * at least one more round in the outer loop.
+ */
+ found_more_threads = true;
+
+ ctx = tsync_works_provide(works, thread);
+ if (!ctx) {
+ /*
+ * We ran out of preallocated contexts -- we need to try again with
+ * this thread at a later time!
+ * found_more_threads is already true at this point.
+ */
+ break;
+ }
+
+ ctx->shared_ctx = shared_ctx;
+
+ atomic_inc(&shared_ctx->num_preparing);
+ atomic_inc(&shared_ctx->num_unfinished);
+
+ init_task_work(&ctx->work, restrict_one_thread_callback);
+ err = task_work_add(thread, &ctx->work, TWA_SIGNAL);
+ if (err) {
+ /*
+ * task_work_add() only fails if the task is about to exit. We
+ * checked that earlier, but it can happen as a race. Resume
+ * without setting an error, as the task is probably gone in the
+ * next loop iteration. For consistency, remove the task from ctx
+ * so that it does not look like we handed it a task_work.
+ */
+ put_task_struct(ctx->task);
+ ctx->task = NULL;
+
+ atomic_dec(&shared_ctx->num_preparing);
+ atomic_dec(&shared_ctx->num_unfinished);
+ }
+ }
+
+ return found_more_threads;
+}
+
+/*
+ * cancel_tsync_works - cancel all task works where it is possible
+ *
+ * Task works can be canceled as long as they are still queued and have not
+ * started running. If they get canceled, we decrement
+ * shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two
+ * completions if needed, as if the task was never scheduled.
+ */
+static void cancel_tsync_works(struct tsync_works *works,
+ struct tsync_shared_context *shared_ctx)
+{
+ int i;
+
+ for (i = 0; i < works->size; i++) {
+ if (!task_work_cancel(works->works[i]->task,
+ &works->works[i]->work))
+ continue;
+
+ /* After dequeueing, act as if the task work had executed. */
+
+ if (atomic_dec_return(&shared_ctx->num_preparing) == 0)
+ complete_all(&shared_ctx->all_prepared);
+
+ if (atomic_dec_return(&shared_ctx->num_unfinished) == 0)
+ complete_all(&shared_ctx->all_finished);
+ }
+}
+
+/*
+ * restrict_sibling_threads - enables a Landlock policy for all sibling threads
+ */
+int landlock_restrict_sibling_threads(const struct cred *old_cred,
+ const struct cred *new_cred)
+{
+ int err;
+ struct tsync_shared_context shared_ctx;
+ struct tsync_works works = {};
+ size_t newly_discovered_threads;
+ bool found_more_threads;
+
+ atomic_set(&shared_ctx.preparation_error, 0);
+ init_completion(&shared_ctx.all_prepared);
+ init_completion(&shared_ctx.ready_to_commit);
+ atomic_set(&shared_ctx.num_unfinished, 1);
+ init_completion(&shared_ctx.all_finished);
+ shared_ctx.old_cred = old_cred;
+ shared_ctx.new_cred = new_cred;
+ shared_ctx.set_no_new_privs = task_no_new_privs(current);
+
+ /*
+ * We schedule a pseudo-signal task_work for each of the calling task's
+ * sibling threads. In the task work, each thread:
+ *
+ * 1) runs prepare_creds() and writes back the error to
+ * shared_ctx.preparation_error, if needed.
+ *
+ * 2) signals that it's done with prepare_creds() to the calling task.
+ * (completion "all_prepared").
+ *
+ * 3) waits for the completion "ready_to_commit". This is sent by the
+ * calling task after ensuring that all sibling threads have done
+ * with the "preparation" stage.
+ *
+ * After this barrier is reached, it's safe to read
+ * shared_ctx.preparation_error.
+ *
+ * 4) reads shared_ctx.preparation_error and then either does commit_creds()
+ * or abort_creds().
+ *
+ * 5) signals that it's done altogether (barrier synchronization
+ * "all_finished")
+ *
+ * Unlike seccomp, which modifies sibling tasks directly, we do not need to
+ * acquire the cred_guard_mutex and sighand->siglock:
+ *
+ * - As in our case, all threads are themselves exchanging their own struct
+ * cred through the credentials API, no locks are needed for that.
+ * - Our for_each_thread() loops are protected by RCU.
+ * - We do not acquire a lock to keep the list of sibling threads stable
+ * between our for_each_thread loops. If the list of available sibling
+ * threads changes between these for_each_thread loops, we make up for
+ * that by continuing to look for threads until they are all discovered
+ * and have entered their task_work, where they are unable to spawn new
+ * threads.
+ */
+ do {
+ /* In RCU read-lock, count the threads we need. */
+ newly_discovered_threads = count_additional_threads(&works);
+
+ if (newly_discovered_threads == 0)
+ break; /* done */
+
+ err = tsync_works_grow_by(&works, newly_discovered_threads,
+ GFP_KERNEL_ACCOUNT);
+ if (err) {
+ atomic_set(&shared_ctx.preparation_error, err);
+ break;
+ }
+
+ /*
+ * The "all_prepared" barrier is used locally to the loop body, this use
+ * of for_each_thread(). We can reset it on each loop iteration because
+ * all previous loop iterations are done with it already.
+ *
+ * num_preparing is initialized to 1 so that the counter can not go to 0
+ * and mark the completion as done before all task works are registered.
+ * We decrement it at the end of the loop body.
+ */
+ atomic_set(&shared_ctx.num_preparing, 1);
+ reinit_completion(&shared_ctx.all_prepared);
+
+ /*
+ * In RCU read-lock, schedule task work on newly discovered sibling
+ * tasks.
+ */
+ found_more_threads = schedule_task_work(&works, &shared_ctx);
+
+ /*
+ * Decrement num_preparing for current, to undo that we initialized it
+ * to 1 a few lines above.
+ */
+ if (atomic_dec_return(&shared_ctx.num_preparing) > 0) {
+ if (wait_for_completion_interruptible(
+ &shared_ctx.all_prepared)) {
+ /* In case of interruption, we need to retry the system call. */
+ atomic_set(&shared_ctx.preparation_error,
+ -ERESTARTNOINTR);
+
+ /*
+ * Cancel task works for tasks that did not start running yet,
+ * and decrement all_prepared and num_unfinished accordingly.
+ */
+ cancel_tsync_works(&works, &shared_ctx);
+
+ /*
+ * The remaining task works have started running, so waiting for
+ * their completion will finish.
+ */
+ wait_for_completion(&shared_ctx.all_prepared);
+ }
+ }
+ } while (found_more_threads &&
+ !atomic_read(&shared_ctx.preparation_error));
+
+ /*
+ * We now have all sibling threads blocking and in "prepared" state in the
+ * task work. Ask all threads to commit.
+ */
+ complete_all(&shared_ctx.ready_to_commit);
+
+ /*
+ * Decrement num_unfinished for current, to undo that we initialized it to 1
+ * at the beginning.
+ */
+ if (atomic_dec_return(&shared_ctx.num_unfinished) > 0)
+ wait_for_completion(&shared_ctx.all_finished);
+
+ tsync_works_release(&works);
+
+ return atomic_read(&shared_ctx.preparation_error);
+}
diff --git a/security/landlock/tsync.h b/security/landlock/tsync.h
new file mode 100644
index 000000000000..ef86bb61c2f6
--- /dev/null
+++ b/security/landlock/tsync.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock - Cross-thread ruleset enforcement
+ *
+ * Copyright © 2025 Google LLC
+ */
+
+#ifndef _SECURITY_LANDLOCK_TSYNC_H
+#define _SECURITY_LANDLOCK_TSYNC_H
+
+#include <linux/cred.h>
+
+int landlock_restrict_sibling_threads(const struct cred *old_cred,
+ const struct cred *new_cred);
+
+#endif /* _SECURITY_LANDLOCK_TSYNC_H */