diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-11 15:57:08 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-11 15:57:08 -0800 |
| commit | c22e26bd0906e9c8325462993f01adb16b8ea2c0 (patch) | |
| tree | 19085170f70859c43b5ee7fe742c35d25fcadd57 /security | |
| parent | d0e91e401e31959154b6518c29d130b1973e3785 (diff) | |
| parent | e265b330b93e3a3f9ff5256451d4f09b5f89b239 (diff) | |
Merge tag 'landlock-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux
Pull landlock updates from Mickaël Salaün:
- extend Landlock to enforce restrictions on a whole process, similarly
to the seccomp's TSYNC flag
- refactor data structures to simplify code and improve performance
- add documentation to cover missing parts
* tag 'landlock-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux:
mailmap: Add entry for Mickaël Salaün
landlock: Transpose the layer masks data structure
landlock: Add access_mask_subset() helper
selftests/landlock: Add filesystem access benchmark
landlock: Document audit blocker field format
landlock: Add errata documentation section
landlock: Add backwards compatibility for restrict flags
landlock: Refactor TCP socket type check
landlock: Minor reword of docs for TCP access rights
landlock: Document LANDLOCK_RESTRICT_SELF_TSYNC
selftests/landlock: Add LANDLOCK_RESTRICT_SELF_TSYNC tests
landlock: Multithreading support for landlock_restrict_self()
Diffstat (limited to 'security')
| -rw-r--r-- | security/landlock/Makefile | 11 | ||||
| -rw-r--r-- | security/landlock/access.h | 35 | ||||
| -rw-r--r-- | security/landlock/audit.c | 81 | ||||
| -rw-r--r-- | security/landlock/audit.h | 3 | ||||
| -rw-r--r-- | security/landlock/cred.h | 12 | ||||
| -rw-r--r-- | security/landlock/domain.c | 44 | ||||
| -rw-r--r-- | security/landlock/domain.h | 3 | ||||
| -rw-r--r-- | security/landlock/errata/abi-1.h | 8 | ||||
| -rw-r--r-- | security/landlock/errata/abi-4.h | 7 | ||||
| -rw-r--r-- | security/landlock/errata/abi-6.h | 10 | ||||
| -rw-r--r-- | security/landlock/fs.c | 352 | ||||
| -rw-r--r-- | security/landlock/limits.h | 2 | ||||
| -rw-r--r-- | security/landlock/net.c | 30 | ||||
| -rw-r--r-- | security/landlock/ruleset.c | 91 | ||||
| -rw-r--r-- | security/landlock/ruleset.h | 6 | ||||
| -rw-r--r-- | security/landlock/syscalls.c | 65 | ||||
| -rw-r--r-- | security/landlock/tsync.c | 561 | ||||
| -rw-r--r-- | security/landlock/tsync.h | 16 |
18 files changed, 960 insertions, 377 deletions
diff --git a/security/landlock/Makefile b/security/landlock/Makefile index 3160c2bdac1d..ffa7646d99f3 100644 --- a/security/landlock/Makefile +++ b/security/landlock/Makefile @@ -1,7 +1,14 @@ obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o -landlock-y := setup.o syscalls.o object.o ruleset.o \ - cred.o task.o fs.o +landlock-y := \ + setup.o \ + syscalls.o \ + object.o \ + ruleset.o \ + cred.o \ + task.o \ + fs.o \ + tsync.o landlock-$(CONFIG_INET) += net.o diff --git a/security/landlock/access.h b/security/landlock/access.h index 7961c6630a2d..42c95747d7bd 100644 --- a/security/landlock/access.h +++ b/security/landlock/access.h @@ -61,14 +61,30 @@ union access_masks_all { static_assert(sizeof(typeof_member(union access_masks_all, masks)) == sizeof(typeof_member(union access_masks_all, all))); -typedef u16 layer_mask_t; - -/* Makes sure all layers can be checked. */ -static_assert(BITS_PER_TYPE(layer_mask_t) >= LANDLOCK_MAX_NUM_LAYERS); +/** + * struct layer_access_masks - A boolean matrix of layers and access rights + * + * This has a bit for each combination of layer numbers and access rights. + * During access checks, it is used to represent the access rights for each + * layer which still need to be fulfilled. When all bits are 0, the access + * request is considered to be fulfilled. + */ +struct layer_access_masks { + /** + * @access: The unfulfilled access rights for each layer. + */ + access_mask_t access[LANDLOCK_MAX_NUM_LAYERS]; +}; /* - * Tracks domains responsible of a denied access. This is required to avoid - * storing in each object the full layer_masks[] required by update_request(). + * Tracks domains responsible of a denied access. This avoids storing in each + * object the full matrix of per-layer unfulfilled access rights, which is + * required by update_request(). + * + * Each nibble represents the layer index of the newest layer which denied a + * certain access right. For file system access rights, the upper four bits are + * the index of the layer which denies LANDLOCK_ACCESS_FS_IOCTL_DEV and the + * lower nibble represents LANDLOCK_ACCESS_FS_TRUNCATE. */ typedef u8 deny_masks_t; @@ -97,4 +113,11 @@ landlock_upgrade_handled_access_masks(struct access_masks access_masks) return access_masks; } +/* Checks the subset relation between access masks. */ +static inline bool access_mask_subset(access_mask_t subset, + access_mask_t superset) +{ + return (subset | superset) == superset; +} + #endif /* _SECURITY_LANDLOCK_ACCESS_H */ diff --git a/security/landlock/audit.c b/security/landlock/audit.c index e899995f1fd5..60ff217ab95b 100644 --- a/security/landlock/audit.c +++ b/security/landlock/audit.c @@ -180,38 +180,21 @@ static void test_get_hierarchy(struct kunit *const test) #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ +/* Get the youngest layer that denied the access_request. */ static size_t get_denied_layer(const struct landlock_ruleset *const domain, access_mask_t *const access_request, - const layer_mask_t (*const layer_masks)[], - const size_t layer_masks_size) + const struct layer_access_masks *masks) { - const unsigned long access_req = *access_request; - unsigned long access_bit; - access_mask_t missing = 0; - long youngest_layer = -1; - - for_each_set_bit(access_bit, &access_req, layer_masks_size) { - const layer_mask_t mask = (*layer_masks)[access_bit]; - long layer; - - if (!mask) - continue; - - /* __fls(1) == 0 */ - layer = __fls(mask); - if (layer > youngest_layer) { - youngest_layer = layer; - missing = BIT(access_bit); - } else if (layer == youngest_layer) { - missing |= BIT(access_bit); + for (ssize_t i = ARRAY_SIZE(masks->access) - 1; i >= 0; i--) { + if (masks->access[i] & *access_request) { + *access_request &= masks->access[i]; + return i; } } - *access_request = missing; - if (youngest_layer == -1) - return domain->num_layers - 1; - - return youngest_layer; + /* Not found - fall back to default values */ + *access_request = 0; + return domain->num_layers - 1; } #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST @@ -221,50 +204,39 @@ static void test_get_denied_layer(struct kunit *const test) const struct landlock_ruleset dom = { .num_layers = 5, }; - const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT(0), - [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_FILE)] = BIT(1), - [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_DIR)] = BIT(1) | BIT(0), - [BIT_INDEX(LANDLOCK_ACCESS_FS_REMOVE_DIR)] = BIT(2), + const struct layer_access_masks masks = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE | + LANDLOCK_ACCESS_FS_READ_DIR, + .access[1] = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_READ_DIR, + .access[2] = LANDLOCK_ACCESS_FS_REMOVE_DIR, }; access_mask_t access; access = LANDLOCK_ACCESS_FS_EXECUTE; - KUNIT_EXPECT_EQ(test, 0, - get_denied_layer(&dom, &access, &layer_masks, - sizeof(layer_masks))); + KUNIT_EXPECT_EQ(test, 0, get_denied_layer(&dom, &access, &masks)); KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_EXECUTE); access = LANDLOCK_ACCESS_FS_READ_FILE; - KUNIT_EXPECT_EQ(test, 1, - get_denied_layer(&dom, &access, &layer_masks, - sizeof(layer_masks))); + KUNIT_EXPECT_EQ(test, 1, get_denied_layer(&dom, &access, &masks)); KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_READ_FILE); access = LANDLOCK_ACCESS_FS_READ_DIR; - KUNIT_EXPECT_EQ(test, 1, - get_denied_layer(&dom, &access, &layer_masks, - sizeof(layer_masks))); + KUNIT_EXPECT_EQ(test, 1, get_denied_layer(&dom, &access, &masks)); KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_READ_DIR); access = LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_READ_DIR; - KUNIT_EXPECT_EQ(test, 1, - get_denied_layer(&dom, &access, &layer_masks, - sizeof(layer_masks))); + KUNIT_EXPECT_EQ(test, 1, get_denied_layer(&dom, &access, &masks)); KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_READ_DIR); access = LANDLOCK_ACCESS_FS_EXECUTE | LANDLOCK_ACCESS_FS_READ_DIR; - KUNIT_EXPECT_EQ(test, 1, - get_denied_layer(&dom, &access, &layer_masks, - sizeof(layer_masks))); + KUNIT_EXPECT_EQ(test, 1, get_denied_layer(&dom, &access, &masks)); KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_READ_DIR); access = LANDLOCK_ACCESS_FS_WRITE_FILE; - KUNIT_EXPECT_EQ(test, 4, - get_denied_layer(&dom, &access, &layer_masks, - sizeof(layer_masks))); + KUNIT_EXPECT_EQ(test, 4, get_denied_layer(&dom, &access, &masks)); KUNIT_EXPECT_EQ(test, access, 0); } @@ -370,9 +342,6 @@ static bool is_valid_request(const struct landlock_request *const request) return false; } - if (WARN_ON_ONCE(!!request->layer_masks ^ !!request->layer_masks_size)) - return false; - if (request->deny_masks) { if (WARN_ON_ONCE(!request->all_existing_optional_access)) return false; @@ -406,12 +375,12 @@ void landlock_log_denial(const struct landlock_cred_security *const subject, if (missing) { /* Gets the nearest domain that denies the request. */ if (request->layer_masks) { - youngest_layer = get_denied_layer( - subject->domain, &missing, request->layer_masks, - request->layer_masks_size); + youngest_layer = get_denied_layer(subject->domain, + &missing, + request->layer_masks); } else { youngest_layer = get_layer_from_deny_masks( - &missing, request->all_existing_optional_access, + &missing, _LANDLOCK_ACCESS_FS_OPTIONAL, request->deny_masks); } youngest_denied = diff --git a/security/landlock/audit.h b/security/landlock/audit.h index 92428b7fc4d8..56778331b58c 100644 --- a/security/landlock/audit.h +++ b/security/landlock/audit.h @@ -43,8 +43,7 @@ struct landlock_request { access_mask_t access; /* Required fields for requests with layer masks. */ - const layer_mask_t (*layer_masks)[]; - size_t layer_masks_size; + const struct layer_access_masks *layer_masks; /* Required fields for requests with deny masks. */ const access_mask_t all_existing_optional_access; diff --git a/security/landlock/cred.h b/security/landlock/cred.h index c82fe63ec598..c10a06727eb1 100644 --- a/security/landlock/cred.h +++ b/security/landlock/cred.h @@ -26,6 +26,8 @@ * This structure is packed to minimize the size of struct * landlock_file_security. However, it is always aligned in the LSM cred blob, * see lsm_set_blob_size(). + * + * When updating this, also update landlock_cred_copy() if needed. */ struct landlock_cred_security { /** @@ -65,6 +67,16 @@ landlock_cred(const struct cred *cred) return cred->security + landlock_blob_sizes.lbs_cred; } +static inline void landlock_cred_copy(struct landlock_cred_security *dst, + const struct landlock_cred_security *src) +{ + landlock_put_ruleset(dst->domain); + + *dst = *src; + + landlock_get_ruleset(src->domain); +} + static inline struct landlock_ruleset *landlock_get_current_domain(void) { return landlock_cred(current_cred())->domain; diff --git a/security/landlock/domain.c b/security/landlock/domain.c index a647b68e8d06..79cb3bbdf4c5 100644 --- a/security/landlock/domain.c +++ b/security/landlock/domain.c @@ -182,32 +182,36 @@ static void test_get_layer_deny_mask(struct kunit *const test) deny_masks_t landlock_get_deny_masks(const access_mask_t all_existing_optional_access, const access_mask_t optional_access, - const layer_mask_t (*const layer_masks)[], - const size_t layer_masks_size) + const struct layer_access_masks *const masks) { const unsigned long access_opt = optional_access; unsigned long access_bit; deny_masks_t deny_masks = 0; + access_mask_t all_denied = 0; /* This may require change with new object types. */ - WARN_ON_ONCE(access_opt != - (optional_access & all_existing_optional_access)); + WARN_ON_ONCE(!access_mask_subset(optional_access, + all_existing_optional_access)); - if (WARN_ON_ONCE(!layer_masks)) + if (WARN_ON_ONCE(!masks)) return 0; if (WARN_ON_ONCE(!access_opt)) return 0; - for_each_set_bit(access_bit, &access_opt, layer_masks_size) { - const layer_mask_t mask = (*layer_masks)[access_bit]; + for (ssize_t i = ARRAY_SIZE(masks->access) - 1; i >= 0; i--) { + const access_mask_t denied = masks->access[i] & optional_access; + const unsigned long newly_denied = denied & ~all_denied; - if (!mask) + if (!newly_denied) continue; - /* __fls(1) == 0 */ - deny_masks |= get_layer_deny_mask(all_existing_optional_access, - access_bit, __fls(mask)); + for_each_set_bit(access_bit, &newly_denied, + 8 * sizeof(access_mask_t)) { + deny_masks |= get_layer_deny_mask( + all_existing_optional_access, access_bit, i); + } + all_denied |= denied; } return deny_masks; } @@ -216,28 +220,28 @@ landlock_get_deny_masks(const access_mask_t all_existing_optional_access, static void test_landlock_get_deny_masks(struct kunit *const test) { - const layer_mask_t layers1[BITS_PER_TYPE(access_mask_t)] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0) | - BIT_ULL(9), - [BIT_INDEX(LANDLOCK_ACCESS_FS_TRUNCATE)] = BIT_ULL(1), - [BIT_INDEX(LANDLOCK_ACCESS_FS_IOCTL_DEV)] = BIT_ULL(2) | - BIT_ULL(0), + const struct layer_access_masks layers1 = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE | + LANDLOCK_ACCESS_FS_IOCTL_DEV, + .access[1] = LANDLOCK_ACCESS_FS_TRUNCATE, + .access[2] = LANDLOCK_ACCESS_FS_IOCTL_DEV, + .access[9] = LANDLOCK_ACCESS_FS_EXECUTE, }; KUNIT_EXPECT_EQ(test, 0x1, landlock_get_deny_masks(_LANDLOCK_ACCESS_FS_OPTIONAL, LANDLOCK_ACCESS_FS_TRUNCATE, - &layers1, ARRAY_SIZE(layers1))); + &layers1)); KUNIT_EXPECT_EQ(test, 0x20, landlock_get_deny_masks(_LANDLOCK_ACCESS_FS_OPTIONAL, LANDLOCK_ACCESS_FS_IOCTL_DEV, - &layers1, ARRAY_SIZE(layers1))); + &layers1)); KUNIT_EXPECT_EQ( test, 0x21, landlock_get_deny_masks(_LANDLOCK_ACCESS_FS_OPTIONAL, LANDLOCK_ACCESS_FS_TRUNCATE | LANDLOCK_ACCESS_FS_IOCTL_DEV, - &layers1, ARRAY_SIZE(layers1))); + &layers1)); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ diff --git a/security/landlock/domain.h b/security/landlock/domain.h index 621f054c9a2b..a9d57db0120d 100644 --- a/security/landlock/domain.h +++ b/security/landlock/domain.h @@ -122,8 +122,7 @@ struct landlock_hierarchy { deny_masks_t landlock_get_deny_masks(const access_mask_t all_existing_optional_access, const access_mask_t optional_access, - const layer_mask_t (*const layer_masks)[], - size_t layer_masks_size); + const struct layer_access_masks *const masks); int landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy); diff --git a/security/landlock/errata/abi-1.h b/security/landlock/errata/abi-1.h index e8a2bff2e5b6..3f099555f059 100644 --- a/security/landlock/errata/abi-1.h +++ b/security/landlock/errata/abi-1.h @@ -12,5 +12,13 @@ * hierarchy down to its filesystem root and those from the related mount point * hierarchy. This prevents access right widening through rename or link * actions. + * + * Impact: + * + * Without this fix, it was possible to widen access rights through rename or + * link actions involving disconnected directories, potentially bypassing + * ``LANDLOCK_ACCESS_FS_REFER`` restrictions. This could allow privilege + * escalation in complex mount scenarios where directories become disconnected + * from their original mount points. */ LANDLOCK_ERRATUM(3) diff --git a/security/landlock/errata/abi-4.h b/security/landlock/errata/abi-4.h index c052ee54f89f..fe11ec7d7ddf 100644 --- a/security/landlock/errata/abi-4.h +++ b/security/landlock/errata/abi-4.h @@ -11,5 +11,12 @@ * :manpage:`bind(2)` and :manpage:`connect(2)` operations. This change ensures * that only TCP sockets are subject to TCP access rights, allowing other * protocols to operate without unnecessary restrictions. + * + * Impact: + * + * In kernels without this fix, using ``LANDLOCK_ACCESS_NET_BIND_TCP`` or + * ``LANDLOCK_ACCESS_NET_CONNECT_TCP`` would incorrectly restrict non-TCP + * stream protocols (SMC, MPTCP, SCTP), potentially breaking applications + * that rely on these protocols while using Landlock network restrictions. */ LANDLOCK_ERRATUM(1) diff --git a/security/landlock/errata/abi-6.h b/security/landlock/errata/abi-6.h index 5113a829f87e..5cb1475c7ea8 100644 --- a/security/landlock/errata/abi-6.h +++ b/security/landlock/errata/abi-6.h @@ -15,5 +15,15 @@ * interaction between threads of the same process should always be allowed. * This change ensures that any thread is allowed to send signals to any other * thread within the same process, regardless of their domain. + * + * Impact: + * + * This problem only manifests when the userspace process is itself using + * :manpage:`libpsx(3)` or an equivalent mechanism to enforce a Landlock policy + * on multiple already-running threads at once. Programs which enforce a + * Landlock policy at startup time and only then become multithreaded are not + * affected. Without this fix, signal scoping could break multi-threaded + * applications that expect threads within the same process to freely signal + * each other. */ LANDLOCK_ERRATUM(2) diff --git a/security/landlock/fs.c b/security/landlock/fs.c index 8205673c8b1c..e764470f588c 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -331,7 +331,7 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, /* Files only get access rights that make sense. */ if (!d_is_dir(path->dentry) && - (access_rights | ACCESS_FILE) != ACCESS_FILE) + !access_mask_subset(access_rights, ACCESS_FILE)) return -EINVAL; if (WARN_ON_ONCE(ruleset->num_layers != 1)) return -EINVAL; @@ -399,56 +399,54 @@ static const struct access_masks any_fs = { }; /* + * Returns true iff the child file with the given src_child access rights under + * src_parent would result in having the same or fewer access rights if it were + * moved under new_parent. + */ +static bool may_refer(const struct layer_access_masks *const src_parent, + const struct layer_access_masks *const src_child, + const struct layer_access_masks *const new_parent, + const bool child_is_dir) +{ + for (size_t i = 0; i < ARRAY_SIZE(new_parent->access); i++) { + access_mask_t child_access = src_parent->access[i] & + src_child->access[i]; + access_mask_t parent_access = new_parent->access[i]; + + if (!child_is_dir) { + child_access &= ACCESS_FILE; + parent_access &= ACCESS_FILE; + } + + if (!access_mask_subset(child_access, parent_access)) + return false; + } + return true; +} + +/* * Check that a destination file hierarchy has more restrictions than a source * file hierarchy. This is only used for link and rename actions. * - * @layer_masks_child2: Optional child masks. + * Returns: true if child1 may be moved from parent1 to parent2 without + * increasing its access rights. If child2 is set, an additional condition is + * that child2 may be used from parent2 to parent1 without increasing its access + * rights. */ -static bool no_more_access( - const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], - const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS], - const bool child1_is_directory, - const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], - const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS], - const bool child2_is_directory) +static bool no_more_access(const struct layer_access_masks *const parent1, + const struct layer_access_masks *const child1, + const bool child1_is_dir, + const struct layer_access_masks *const parent2, + const struct layer_access_masks *const child2, + const bool child2_is_dir) { - unsigned long access_bit; - - for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2); - access_bit++) { - /* Ignores accesses that only make sense for directories. */ - const bool is_file_access = - !!(BIT_ULL(access_bit) & ACCESS_FILE); + if (!may_refer(parent1, child1, parent2, child1_is_dir)) + return false; - if (child1_is_directory || is_file_access) { - /* - * Checks if the destination restrictions are a - * superset of the source ones (i.e. inherited access - * rights without child exceptions): - * restrictions(parent2) >= restrictions(child1) - */ - if ((((*layer_masks_parent1)[access_bit] & - (*layer_masks_child1)[access_bit]) | - (*layer_masks_parent2)[access_bit]) != - (*layer_masks_parent2)[access_bit]) - return false; - } + if (!child2) + return true; - if (!layer_masks_child2) - continue; - if (child2_is_directory || is_file_access) { - /* - * Checks inverted restrictions for RENAME_EXCHANGE: - * restrictions(parent1) >= restrictions(child2) - */ - if ((((*layer_masks_parent2)[access_bit] & - (*layer_masks_child2)[access_bit]) | - (*layer_masks_parent1)[access_bit]) != - (*layer_masks_parent1)[access_bit]) - return false; - } - } - return true; + return may_refer(parent2, child2, parent1, child2_is_dir); } #define NMA_TRUE(...) KUNIT_EXPECT_TRUE(test, no_more_access(__VA_ARGS__)) @@ -458,25 +456,25 @@ static bool no_more_access( static void test_no_more_access(struct kunit *const test) { - const layer_mask_t rx0[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), - [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_FILE)] = BIT_ULL(0), + const struct layer_access_masks rx0 = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE | + LANDLOCK_ACCESS_FS_READ_FILE, }; - const layer_mask_t mx0[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), - [BIT_INDEX(LANDLOCK_ACCESS_FS_MAKE_REG)] = BIT_ULL(0), + const struct layer_access_masks mx0 = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE | + LANDLOCK_ACCESS_FS_MAKE_REG, }; - const layer_mask_t x0[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), + const struct layer_access_masks x0 = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE, }; - const layer_mask_t x1[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(1), + const struct layer_access_masks x1 = { + .access[1] = LANDLOCK_ACCESS_FS_EXECUTE, }; - const layer_mask_t x01[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0) | - BIT_ULL(1), + const struct layer_access_masks x01 = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE, + .access[1] = LANDLOCK_ACCESS_FS_EXECUTE, }; - const layer_mask_t allows_all[LANDLOCK_NUM_ACCESS_FS] = {}; + const struct layer_access_masks allows_all = {}; /* Checks without restriction. */ NMA_TRUE(&x0, &allows_all, false, &allows_all, NULL, false); @@ -564,31 +562,30 @@ static void test_no_more_access(struct kunit *const test) #undef NMA_TRUE #undef NMA_FALSE -static bool is_layer_masks_allowed( - layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) +static bool is_layer_masks_allowed(const struct layer_access_masks *masks) { - return !memchr_inv(layer_masks, 0, sizeof(*layer_masks)); + return !memchr_inv(&masks->access, 0, sizeof(masks->access)); } /* - * Removes @layer_masks accesses that are not requested. + * Removes @masks accesses that are not requested. * * Returns true if the request is allowed, false otherwise. */ -static bool -scope_to_request(const access_mask_t access_request, - layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) +static bool scope_to_request(const access_mask_t access_request, + struct layer_access_masks *masks) { - const unsigned long access_req = access_request; - unsigned long access_bit; + bool saw_unfulfilled_access = false; - if (WARN_ON_ONCE(!layer_masks)) + if (WARN_ON_ONCE(!masks)) return true; - for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks)) - (*layer_masks)[access_bit] = 0; - - return is_layer_masks_allowed(layer_masks); + for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) { + masks->access[i] &= access_request; + if (masks->access[i]) + saw_unfulfilled_access = true; + } + return !saw_unfulfilled_access; } #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST @@ -596,48 +593,41 @@ scope_to_request(const access_mask_t access_request, static void test_scope_to_request_with_exec_none(struct kunit *const test) { /* Allows everything. */ - layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; + struct layer_access_masks masks = {}; /* Checks and scopes with execute. */ - KUNIT_EXPECT_TRUE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, - &layer_masks)); - KUNIT_EXPECT_EQ(test, 0, - layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); - KUNIT_EXPECT_EQ(test, 0, - layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); + KUNIT_EXPECT_TRUE(test, + scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, &masks)); + KUNIT_EXPECT_EQ(test, 0, masks.access[0]); } static void test_scope_to_request_with_exec_some(struct kunit *const test) { /* Denies execute and write. */ - layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), - [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1), + struct layer_access_masks masks = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE, + .access[1] = LANDLOCK_ACCESS_FS_WRITE_FILE, }; /* Checks and scopes with execute. */ KUNIT_EXPECT_FALSE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, - &layer_masks)); - KUNIT_EXPECT_EQ(test, BIT_ULL(0), - layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); - KUNIT_EXPECT_EQ(test, 0, - layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); + &masks)); + KUNIT_EXPECT_EQ(test, LANDLOCK_ACCESS_FS_EXECUTE, masks.access[0]); + KUNIT_EXPECT_EQ(test, 0, masks.access[1]); } static void test_scope_to_request_without_access(struct kunit *const test) { /* Denies execute and write. */ - layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), - [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1), + struct layer_access_masks masks = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE, + .access[1] = LANDLOCK_ACCESS_FS_WRITE_FILE, }; /* Checks and scopes without access request. */ - KUNIT_EXPECT_TRUE(test, scope_to_request(0, &layer_masks)); - KUNIT_EXPECT_EQ(test, 0, - layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); - KUNIT_EXPECT_EQ(test, 0, - layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); + KUNIT_EXPECT_TRUE(test, scope_to_request(0, &masks)); + KUNIT_EXPECT_EQ(test, 0, masks.access[0]); + KUNIT_EXPECT_EQ(test, 0, masks.access[1]); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ @@ -646,20 +636,16 @@ static void test_scope_to_request_without_access(struct kunit *const test) * Returns true if there is at least one access right different than * LANDLOCK_ACCESS_FS_REFER. */ -static bool -is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS], - const access_mask_t access_request) +static bool is_eacces(const struct layer_access_masks *masks, + const access_mask_t access_request) { - unsigned long access_bit; - /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ - const unsigned long access_check = access_request & - ~LANDLOCK_ACCESS_FS_REFER; - - if (!layer_masks) + if (!masks) return false; - for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) { - if ((*layer_masks)[access_bit]) + for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) { + /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ + if (masks->access[i] & access_request & + ~LANDLOCK_ACCESS_FS_REFER) return true; } return false; @@ -672,37 +658,37 @@ is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS], static void test_is_eacces_with_none(struct kunit *const test) { - const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; + const struct layer_access_masks masks = {}; - IE_FALSE(&layer_masks, 0); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); + IE_FALSE(&masks, 0); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE); } static void test_is_eacces_with_refer(struct kunit *const test) { - const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_REFER)] = BIT_ULL(0), + const struct layer_access_masks masks = { + .access[0] = LANDLOCK_ACCESS_FS_REFER, }; - IE_FALSE(&layer_masks, 0); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); + IE_FALSE(&masks, 0); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE); } static void test_is_eacces_with_write(struct kunit *const test) { - const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(0), + const struct layer_access_masks masks = { + .access[0] = LANDLOCK_ACCESS_FS_WRITE_FILE, }; - IE_FALSE(&layer_masks, 0); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); + IE_FALSE(&masks, 0); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE); - IE_TRUE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); + IE_TRUE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ @@ -752,26 +738,25 @@ static void test_is_eacces_with_write(struct kunit *const test) * - true if the access request is granted; * - false otherwise. */ -static bool is_access_to_paths_allowed( - const struct landlock_ruleset *const domain, - const struct path *const path, - const access_mask_t access_request_parent1, - layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], - struct landlock_request *const log_request_parent1, - struct dentry *const dentry_child1, - const access_mask_t access_request_parent2, - layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], - struct landlock_request *const log_request_parent2, - struct dentry *const dentry_child2) +static bool +is_access_to_paths_allowed(const struct landlock_ruleset *const domain, + const struct path *const path, + const access_mask_t access_request_parent1, + struct layer_access_masks *layer_masks_parent1, + struct landlock_request *const log_request_parent1, + struct dentry *const dentry_child1, + const access_mask_t access_request_parent2, + struct layer_access_masks *layer_masks_parent2, + struct landlock_request *const log_request_parent2, + struct dentry *const dentry_child2) { bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check, child1_is_directory = true, child2_is_directory = true; struct path walker_path; access_mask_t access_masked_parent1, access_masked_parent2; - layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS], - _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS]; - layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL, - (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL; + struct layer_access_masks _layer_masks_child1, _layer_masks_child2; + struct layer_access_masks *layer_masks_child1 = NULL, + *layer_masks_child2 = NULL; if (!access_request_parent1 && !access_request_parent2) return true; @@ -811,22 +796,20 @@ static bool is_access_to_paths_allowed( } if (unlikely(dentry_child1)) { - landlock_unmask_layers( - find_rule(domain, dentry_child1), - landlock_init_layer_masks( - domain, LANDLOCK_MASK_ACCESS_FS, - &_layer_masks_child1, LANDLOCK_KEY_INODE), - &_layer_masks_child1, ARRAY_SIZE(_layer_masks_child1)); + if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, + &_layer_masks_child1, + LANDLOCK_KEY_INODE)) + landlock_unmask_layers(find_rule(domain, dentry_child1), + &_layer_masks_child1); layer_masks_child1 = &_layer_masks_child1; child1_is_directory = d_is_dir(dentry_child1); } if (unlikely(dentry_child2)) { - landlock_unmask_layers( - find_rule(domain, dentry_child2), - landlock_init_layer_masks( - domain, LANDLOCK_MASK_ACCESS_FS, - &_layer_masks_child2, LANDLOCK_KEY_INODE), - &_layer_masks_child2, ARRAY_SIZE(_layer_masks_child2)); + if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, + &_layer_masks_child2, + LANDLOCK_KEY_INODE)) + landlock_unmask_layers(find_rule(domain, dentry_child2), + &_layer_masks_child2); layer_masks_child2 = &_layer_masks_child2; child2_is_directory = d_is_dir(dentry_child2); } @@ -881,16 +864,12 @@ static bool is_access_to_paths_allowed( } rule = find_rule(domain, walker_path.dentry); - allowed_parent1 = allowed_parent1 || - landlock_unmask_layers( - rule, access_masked_parent1, - layer_masks_parent1, - ARRAY_SIZE(*layer_masks_parent1)); - allowed_parent2 = allowed_parent2 || - landlock_unmask_layers( - rule, access_masked_parent2, - layer_masks_parent2, - ARRAY_SIZE(*layer_masks_parent2)); + allowed_parent1 = + allowed_parent1 || + landlock_unmask_layers(rule, layer_masks_parent1); + allowed_parent2 = + allowed_parent2 || + landlock_unmask_layers(rule, layer_masks_parent2); /* Stops when a rule from each layer grants access. */ if (allowed_parent1 && allowed_parent2) @@ -950,8 +929,6 @@ jump_up: log_request_parent1->audit.u.path = *path; log_request_parent1->access = access_masked_parent1; log_request_parent1->layer_masks = layer_masks_parent1; - log_request_parent1->layer_masks_size = - ARRAY_SIZE(*layer_masks_parent1); } if (!allowed_parent2 && log_request_parent2) { @@ -960,8 +937,6 @@ jump_up: log_request_parent2->audit.u.path = *path; log_request_parent2->access = access_masked_parent2; log_request_parent2->layer_masks = layer_masks_parent2; - log_request_parent2->layer_masks_size = - ARRAY_SIZE(*layer_masks_parent2); } #endif /* CONFIG_AUDIT */ @@ -976,7 +951,7 @@ static int current_check_access_path(const struct path *const path, }; const struct landlock_cred_security *const subject = landlock_get_applicable_subject(current_cred(), masks, NULL); - layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; + struct layer_access_masks layer_masks; struct landlock_request request = {}; if (!subject) @@ -1051,12 +1026,11 @@ static access_mask_t maybe_remove(const struct dentry *const dentry) * - true if all the domain access rights are allowed for @dir; * - false if the walk reached @mnt_root. */ -static bool collect_domain_accesses( - const struct landlock_ruleset *const domain, - const struct dentry *const mnt_root, struct dentry *dir, - layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS]) +static bool collect_domain_accesses(const struct landlock_ruleset *const domain, + const struct dentry *const mnt_root, + struct dentry *dir, + struct layer_access_masks *layer_masks_dom) { - unsigned long access_dom; bool ret = false; if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom)) @@ -1064,18 +1038,17 @@ static bool collect_domain_accesses( if (is_nouser_or_private(dir)) return true; - access_dom = landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, - layer_masks_dom, - LANDLOCK_KEY_INODE); + if (!landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, + layer_masks_dom, LANDLOCK_KEY_INODE)) + return true; dget(dir); while (true) { struct dentry *parent_dentry; /* Gets all layers allowing all domain accesses. */ - if (landlock_unmask_layers(find_rule(domain, dir), access_dom, - layer_masks_dom, - ARRAY_SIZE(*layer_masks_dom))) { + if (landlock_unmask_layers(find_rule(domain, dir), + layer_masks_dom)) { /* * Stops when all handled accesses are allowed by at * least one rule in each layer. @@ -1163,8 +1136,8 @@ static int current_check_refer_path(struct dentry *const old_dentry, access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; struct dentry *old_parent; - layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, - layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; + struct layer_access_masks layer_masks_parent1 = {}, + layer_masks_parent2 = {}; struct landlock_request request1 = {}, request2 = {}; if (!subject) @@ -1640,7 +1613,7 @@ static bool is_device(const struct file *const file) static int hook_file_open(struct file *const file) { - layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; + struct layer_access_masks layer_masks = {}; access_mask_t open_access_request, full_access_request, allowed_access, optional_access; const struct landlock_cred_security *const subject = @@ -1675,20 +1648,14 @@ static int hook_file_open(struct file *const file) &layer_masks, &request, NULL, 0, NULL, NULL, NULL)) { allowed_access = full_access_request; } else { - unsigned long access_bit; - const unsigned long access_req = full_access_request; - /* * Calculate the actual allowed access rights from layer_masks. - * Add each access right to allowed_access which has not been - * vetoed by any layer. + * Remove the access rights from the full access request which + * are still unfulfilled in any of the layers. */ - allowed_access = 0; - for_each_set_bit(access_bit, &access_req, - ARRAY_SIZE(layer_masks)) { - if (!layer_masks[access_bit]) - allowed_access |= BIT_ULL(access_bit); - } + allowed_access = full_access_request; + for (size_t i = 0; i < ARRAY_SIZE(layer_masks.access); i++) + allowed_access &= ~layer_masks.access[i]; } /* @@ -1700,11 +1667,10 @@ static int hook_file_open(struct file *const file) landlock_file(file)->allowed_access = allowed_access; #ifdef CONFIG_AUDIT landlock_file(file)->deny_masks = landlock_get_deny_masks( - _LANDLOCK_ACCESS_FS_OPTIONAL, optional_access, &layer_masks, - ARRAY_SIZE(layer_masks)); + _LANDLOCK_ACCESS_FS_OPTIONAL, optional_access, &layer_masks); #endif /* CONFIG_AUDIT */ - if ((open_access_request & allowed_access) == open_access_request) + if (access_mask_subset(open_access_request, allowed_access)) return 0; /* Sets access to reflect the actual request. */ diff --git a/security/landlock/limits.h b/security/landlock/limits.h index 65b5ff051674..eb584f47288d 100644 --- a/security/landlock/limits.h +++ b/security/landlock/limits.h @@ -31,7 +31,7 @@ #define LANDLOCK_MASK_SCOPE ((LANDLOCK_LAST_SCOPE << 1) - 1) #define LANDLOCK_NUM_SCOPE __const_hweight64(LANDLOCK_MASK_SCOPE) -#define LANDLOCK_LAST_RESTRICT_SELF LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF +#define LANDLOCK_LAST_RESTRICT_SELF LANDLOCK_RESTRICT_SELF_TSYNC #define LANDLOCK_MASK_RESTRICT_SELF ((LANDLOCK_LAST_RESTRICT_SELF << 1) - 1) /* clang-format on */ diff --git a/security/landlock/net.c b/security/landlock/net.c index e6367e30e5b0..c368649985c5 100644 --- a/security/landlock/net.c +++ b/security/landlock/net.c @@ -47,7 +47,7 @@ static int current_check_access_socket(struct socket *const sock, access_mask_t access_request) { __be16 port; - layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_NET] = {}; + struct layer_access_masks layer_masks = {}; const struct landlock_rule *rule; struct landlock_id id = { .type = LANDLOCK_KEY_NET_PORT, @@ -62,9 +62,6 @@ static int current_check_access_socket(struct socket *const sock, if (!subject) return 0; - if (!sk_is_tcp(sock->sk)) - return 0; - /* Checks for minimal header length to safely read sa_family. */ if (addrlen < offsetofend(typeof(*address), sa_family)) return -EINVAL; @@ -194,8 +191,10 @@ static int current_check_access_socket(struct socket *const sock, access_request = landlock_init_layer_masks(subject->domain, access_request, &layer_masks, LANDLOCK_KEY_NET_PORT); - if (landlock_unmask_layers(rule, access_request, &layer_masks, - ARRAY_SIZE(layer_masks))) + if (!access_request) + return 0; + + if (landlock_unmask_layers(rule, &layer_masks)) return 0; audit_net.family = address->sa_family; @@ -206,7 +205,6 @@ static int current_check_access_socket(struct socket *const sock, .audit.u.net = &audit_net, .access = access_request, .layer_masks = &layer_masks, - .layer_masks_size = ARRAY_SIZE(layer_masks), }); return -EACCES; } @@ -214,16 +212,30 @@ static int current_check_access_socket(struct socket *const sock, static int hook_socket_bind(struct socket *const sock, struct sockaddr *const address, const int addrlen) { + access_mask_t access_request; + + if (sk_is_tcp(sock->sk)) + access_request = LANDLOCK_ACCESS_NET_BIND_TCP; + else + return 0; + return current_check_access_socket(sock, address, addrlen, - LANDLOCK_ACCESS_NET_BIND_TCP); + access_request); } static int hook_socket_connect(struct socket *const sock, struct sockaddr *const address, const int addrlen) { + access_mask_t access_request; + + if (sk_is_tcp(sock->sk)) + access_request = LANDLOCK_ACCESS_NET_CONNECT_TCP; + else + return 0; + return current_check_access_socket(sock, address, addrlen, - LANDLOCK_ACCESS_NET_CONNECT_TCP); + access_request); } static struct security_hook_list landlock_hooks[] __ro_after_init = { diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c index 0a5b0c76b3f7..419b237de635 100644 --- a/security/landlock/ruleset.c +++ b/security/landlock/ruleset.c @@ -612,22 +612,24 @@ landlock_find_rule(const struct landlock_ruleset *const ruleset, return NULL; } -/* - * @layer_masks is read and may be updated according to the access request and - * the matching rule. - * @masks_array_size must be equal to ARRAY_SIZE(*layer_masks). +/** + * landlock_unmask_layers - Remove the access rights in @masks + * which are granted in @rule + * + * Updates the set of (per-layer) unfulfilled access rights @masks + * so that all the access rights granted in @rule are removed from it + * (because they are now fulfilled). + * + * @rule: A rule that grants a set of access rights for each layer + * @masks: A matrix of unfulfilled access rights for each layer * - * Returns true if the request is allowed (i.e. relevant layer masks for the - * request are empty). + * Returns true if the request is allowed (i.e. the access rights granted all + * remaining unfulfilled access rights and masks has no leftover set bits). */ bool landlock_unmask_layers(const struct landlock_rule *const rule, - const access_mask_t access_request, - layer_mask_t (*const layer_masks)[], - const size_t masks_array_size) + struct layer_access_masks *masks) { - size_t layer_level; - - if (!access_request || !layer_masks) + if (!masks) return true; if (!rule) return false; @@ -642,28 +644,18 @@ bool landlock_unmask_layers(const struct landlock_rule *const rule, * by only one rule, but by the union (binary OR) of multiple rules. * E.g. /a/b <execute> + /a <read> => /a/b <execute + read> */ - for (layer_level = 0; layer_level < rule->num_layers; layer_level++) { - const struct landlock_layer *const layer = - &rule->layers[layer_level]; - const layer_mask_t layer_bit = BIT_ULL(layer->level - 1); - const unsigned long access_req = access_request; - unsigned long access_bit; - bool is_empty; + for (size_t i = 0; i < rule->num_layers; i++) { + const struct landlock_layer *const layer = &rule->layers[i]; - /* - * Records in @layer_masks which layer grants access to each requested - * access: bit cleared if the related layer grants access. - */ - is_empty = true; - for_each_set_bit(access_bit, &access_req, masks_array_size) { - if (layer->access & BIT_ULL(access_bit)) - (*layer_masks)[access_bit] &= ~layer_bit; - is_empty = is_empty && !(*layer_masks)[access_bit]; - } - if (is_empty) - return true; + /* Clear the bits where the layer in the rule grants access. */ + masks->access[layer->level - 1] &= ~layer->access; + } + + for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) { + if (masks->access[i]) + return false; } - return false; + return true; } typedef access_mask_t @@ -673,13 +665,12 @@ get_access_mask_t(const struct landlock_ruleset *const ruleset, /** * landlock_init_layer_masks - Initialize layer masks from an access request * - * Populates @layer_masks such that for each access right in @access_request, + * Populates @masks such that for each access right in @access_request, * the bits for all the layers are set where this access right is handled. * * @domain: The domain that defines the current restrictions. * @access_request: The requested access rights to check. - * @layer_masks: It must contain %LANDLOCK_NUM_ACCESS_FS or - * %LANDLOCK_NUM_ACCESS_NET elements according to @key_type. + * @masks: Layer access masks to populate. * @key_type: The key type to switch between access masks of different types. * * Returns: An access mask where each access right bit is set which is handled @@ -688,23 +679,20 @@ get_access_mask_t(const struct landlock_ruleset *const ruleset, access_mask_t landlock_init_layer_masks(const struct landlock_ruleset *const domain, const access_mask_t access_request, - layer_mask_t (*const layer_masks)[], + struct layer_access_masks *const masks, const enum landlock_key_type key_type) { access_mask_t handled_accesses = 0; - size_t layer_level, num_access; get_access_mask_t *get_access_mask; switch (key_type) { case LANDLOCK_KEY_INODE: get_access_mask = landlock_get_fs_access_mask; - num_access = LANDLOCK_NUM_ACCESS_FS; break; #if IS_ENABLED(CONFIG_INET) case LANDLOCK_KEY_NET_PORT: get_access_mask = landlock_get_net_access_mask; - num_access = LANDLOCK_NUM_ACCESS_NET; break; #endif /* IS_ENABLED(CONFIG_INET) */ @@ -713,27 +701,18 @@ landlock_init_layer_masks(const struct landlock_ruleset *const domain, return 0; } - memset(layer_masks, 0, - array_size(sizeof((*layer_masks)[0]), num_access)); - /* An empty access request can happen because of O_WRONLY | O_RDWR. */ if (!access_request) return 0; - /* Saves all handled accesses per layer. */ - for (layer_level = 0; layer_level < domain->num_layers; layer_level++) { - const unsigned long access_req = access_request; - const access_mask_t access_mask = - get_access_mask(domain, layer_level); - unsigned long access_bit; - - for_each_set_bit(access_bit, &access_req, num_access) { - if (BIT_ULL(access_bit) & access_mask) { - (*layer_masks)[access_bit] |= - BIT_ULL(layer_level); - handled_accesses |= BIT_ULL(access_bit); - } - } + for (size_t i = 0; i < domain->num_layers; i++) { + const access_mask_t handled = get_access_mask(domain, i); + + masks->access[i] = access_request & handled; + handled_accesses |= masks->access[i]; } + for (size_t i = domain->num_layers; i < ARRAY_SIZE(masks->access); i++) + masks->access[i] = 0; + return handled_accesses; } diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h index 1a78cba662b2..9d6dc632684c 100644 --- a/security/landlock/ruleset.h +++ b/security/landlock/ruleset.h @@ -302,14 +302,12 @@ landlock_get_scope_mask(const struct landlock_ruleset *const ruleset, } bool landlock_unmask_layers(const struct landlock_rule *const rule, - const access_mask_t access_request, - layer_mask_t (*const layer_masks)[], - const size_t masks_array_size); + struct layer_access_masks *masks); access_mask_t landlock_init_layer_masks(const struct landlock_ruleset *const domain, const access_mask_t access_request, - layer_mask_t (*const layer_masks)[], + struct layer_access_masks *masks, const enum landlock_key_type key_type); #endif /* _SECURITY_LANDLOCK_RULESET_H */ diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 0116e9f93ffe..0d66a68677b7 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -36,6 +36,7 @@ #include "net.h" #include "ruleset.h" #include "setup.h" +#include "tsync.h" static bool is_initialized(void) { @@ -157,11 +158,13 @@ static const struct file_operations ruleset_fops = { /* * The Landlock ABI version should be incremented for each new Landlock-related * user space visible change (e.g. Landlock syscalls). This version should - * only be incremented once per Linux release, and the date in + * only be incremented once per Linux release. When incrementing, the date in * Documentation/userspace-api/landlock.rst should be updated to reflect the * UAPI change. + * If the change involves a fix that requires userspace awareness, also update + * the errata documentation in Documentation/userspace-api/landlock.rst . */ -const int landlock_abi_version = 7; +const int landlock_abi_version = 8; /** * sys_landlock_create_ruleset - Create a new ruleset @@ -454,9 +457,10 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF + * - %LANDLOCK_RESTRICT_SELF_TSYNC * - * This system call enables to enforce a Landlock ruleset on the current - * thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its + * This system call enforces a Landlock ruleset on the current thread. + * Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its * namespace or is running with no_new_privs. This avoids scenarios where * unprivileged tasks can affect the behavior of privileged children. * @@ -478,8 +482,7 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, flags) { - struct landlock_ruleset *new_dom, - *ruleset __free(landlock_put_ruleset) = NULL; + struct landlock_ruleset *ruleset __free(landlock_put_ruleset) = NULL; struct cred *new_cred; struct landlock_cred_security *new_llcred; bool __maybe_unused log_same_exec, log_new_exec, log_subdomains, @@ -538,33 +541,43 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, * We could optimize this case by not calling commit_creds() if this flag * was already set, but it is not worth the complexity. */ - if (!ruleset) - return commit_creds(new_cred); - - /* - * There is no possible race condition while copying and manipulating - * the current credentials because they are dedicated per thread. - */ - new_dom = landlock_merge_ruleset(new_llcred->domain, ruleset); - if (IS_ERR(new_dom)) { - abort_creds(new_cred); - return PTR_ERR(new_dom); - } + if (ruleset) { + /* + * There is no possible race condition while copying and + * manipulating the current credentials because they are + * dedicated per thread. + */ + struct landlock_ruleset *const new_dom = + landlock_merge_ruleset(new_llcred->domain, ruleset); + if (IS_ERR(new_dom)) { + abort_creds(new_cred); + return PTR_ERR(new_dom); + } #ifdef CONFIG_AUDIT - new_dom->hierarchy->log_same_exec = log_same_exec; - new_dom->hierarchy->log_new_exec = log_new_exec; - if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains) - new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED; + new_dom->hierarchy->log_same_exec = log_same_exec; + new_dom->hierarchy->log_new_exec = log_new_exec; + if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains) + new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED; #endif /* CONFIG_AUDIT */ - /* Replaces the old (prepared) domain. */ - landlock_put_ruleset(new_llcred->domain); - new_llcred->domain = new_dom; + /* Replaces the old (prepared) domain. */ + landlock_put_ruleset(new_llcred->domain); + new_llcred->domain = new_dom; #ifdef CONFIG_AUDIT - new_llcred->domain_exec |= BIT(new_dom->num_layers - 1); + new_llcred->domain_exec |= BIT(new_dom->num_layers - 1); #endif /* CONFIG_AUDIT */ + } + + if (flags & LANDLOCK_RESTRICT_SELF_TSYNC) { + const int err = landlock_restrict_sibling_threads( + current_cred(), new_cred); + if (err) { + abort_creds(new_cred); + return err; + } + } return commit_creds(new_cred); } diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c new file mode 100644 index 000000000000..0d2b9c646030 --- /dev/null +++ b/security/landlock/tsync.c @@ -0,0 +1,561 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Landlock - Cross-thread ruleset enforcement + * + * Copyright © 2025 Google LLC + */ + +#include <linux/atomic.h> +#include <linux/cleanup.h> +#include <linux/completion.h> +#include <linux/cred.h> +#include <linux/errno.h> +#include <linux/overflow.h> +#include <linux/rcupdate.h> +#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/task.h> +#include <linux/slab.h> +#include <linux/task_work.h> + +#include "cred.h" +#include "tsync.h" + +/* + * Shared state between multiple threads which are enforcing Landlock rulesets + * in lockstep with each other. + */ +struct tsync_shared_context { + /* The old and tentative new creds of the calling thread. */ + const struct cred *old_cred; + const struct cred *new_cred; + + /* True if sibling tasks need to set the no_new_privs flag. */ + bool set_no_new_privs; + + /* An error encountered in preparation step, or 0. */ + atomic_t preparation_error; + + /* + * Barrier after preparation step in restrict_one_thread. + * The calling thread waits for completion. + * + * Re-initialized on every round of looking for newly spawned threads. + */ + atomic_t num_preparing; + struct completion all_prepared; + + /* Sibling threads wait for completion. */ + struct completion ready_to_commit; + + /* + * Barrier after commit step (used by syscall impl to wait for + * completion). + */ + atomic_t num_unfinished; + struct completion all_finished; +}; + +struct tsync_work { + struct callback_head work; + struct task_struct *task; + struct tsync_shared_context *shared_ctx; +}; + +/* + * restrict_one_thread - update a thread's Landlock domain in lockstep with the + * other threads in the same process + * + * When this is run, the same function gets run in all other threads in the same + * process (except for the calling thread which called landlock_restrict_self). + * The concurrently running invocations of restrict_one_thread coordinate + * through the shared ctx object to do their work in lockstep to implement + * all-or-nothing semantics for enforcing the new Landlock domain. + * + * Afterwards, depending on the presence of an error, all threads either commit + * or abort the prepared credentials. The commit operation can not fail any + * more. + */ +static void restrict_one_thread(struct tsync_shared_context *ctx) +{ + int err; + struct cred *cred = NULL; + + if (current_cred() == ctx->old_cred) { + /* + * Switch out old_cred with new_cred, if possible. + * + * In the common case, where all threads initially point to the same + * struct cred, this optimization avoids creating separate redundant + * credentials objects for each, which would all have the same contents. + * + * Note: We are intentionally dropping the const qualifier here, because + * it is required by commit_creds() and abort_creds(). + */ + cred = (struct cred *)get_cred(ctx->new_cred); + } else { + /* Else, prepare new creds and populate them. */ + cred = prepare_creds(); + + if (!cred) { + atomic_set(&ctx->preparation_error, -ENOMEM); + + /* + * Even on error, we need to adhere to the protocol and coordinate + * with concurrently running invocations. + */ + if (atomic_dec_return(&ctx->num_preparing) == 0) + complete_all(&ctx->all_prepared); + + goto out; + } + + landlock_cred_copy(landlock_cred(cred), + landlock_cred(ctx->new_cred)); + } + + /* + * Barrier: Wait until all threads are done preparing. + * After this point, we can have no more failures. + */ + if (atomic_dec_return(&ctx->num_preparing) == 0) + complete_all(&ctx->all_prepared); + + /* + * Wait for signal from calling thread that it's safe to read the + * preparation error now and we are ready to commit (or abort). + */ + wait_for_completion(&ctx->ready_to_commit); + + /* Abort the commit if any of the other threads had an error. */ + err = atomic_read(&ctx->preparation_error); + if (err) { + abort_creds(cred); + goto out; + } + + /* + * Make sure that all sibling tasks fulfill the no_new_privs prerequisite. + * (This is in line with Seccomp's SECCOMP_FILTER_FLAG_TSYNC logic in + * kernel/seccomp.c) + */ + if (ctx->set_no_new_privs) + task_set_no_new_privs(current); + + commit_creds(cred); + +out: + /* Notify the calling thread once all threads are done */ + if (atomic_dec_return(&ctx->num_unfinished) == 0) + complete_all(&ctx->all_finished); +} + +/* + * restrict_one_thread_callback - task_work callback for restricting a thread + * + * Calls restrict_one_thread with the struct landlock_shared_tsync_context. + */ +static void restrict_one_thread_callback(struct callback_head *work) +{ + struct tsync_work *ctx = container_of(work, struct tsync_work, work); + + restrict_one_thread(ctx->shared_ctx); +} + +/* + * struct tsync_works - a growable array of per-task contexts + * + * The zero-initialized struct represents the empty array. + */ +struct tsync_works { + struct tsync_work **works; + size_t size; + size_t capacity; +}; + +/* + * tsync_works_provide - provides a preallocated tsync_work for the given task + * + * This also stores a task pointer in the context and increments the reference + * count of the task. + * + * This function may fail in the case where we did not preallocate sufficient + * capacity. This can legitimately happen if new threads get started after we + * grew the capacity. + * + * Returns: + * A pointer to the preallocated context struct, with task filled in. + * + * NULL, if we ran out of preallocated context structs. + */ +static struct tsync_work *tsync_works_provide(struct tsync_works *s, + struct task_struct *task) +{ + struct tsync_work *ctx; + + if (s->size >= s->capacity) + return NULL; + + ctx = s->works[s->size]; + s->size++; + + ctx->task = get_task_struct(task); + return ctx; +} + +/* + * tsync_works_grow_by - preallocates space for n more contexts in s + * + * On a successful return, the subsequent n calls to tsync_works_provide() are + * guaranteed to succeed. (size + n <= capacity) + * + * Returns: + * -ENOMEM if the (re)allocation fails + + * 0 if the allocation succeeds, partially succeeds, or no reallocation + * was needed + */ +static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags) +{ + size_t i; + size_t new_capacity; + struct tsync_work **works; + struct tsync_work *work; + + if (check_add_overflow(s->size, n, &new_capacity)) + return -EOVERFLOW; + + /* No need to reallocate if s already has sufficient capacity. */ + if (new_capacity <= s->capacity) + return 0; + + works = krealloc_array(s->works, new_capacity, sizeof(s->works[0]), + flags); + if (!works) + return -ENOMEM; + + s->works = works; + + for (i = s->capacity; i < new_capacity; i++) { + work = kzalloc(sizeof(*work), flags); + if (!work) { + /* + * Leave the object in a consistent state, + * but return an error. + */ + s->capacity = i; + return -ENOMEM; + } + s->works[i] = work; + } + s->capacity = new_capacity; + return 0; +} + +/* + * tsync_works_contains - checks for presence of task in s + */ +static bool tsync_works_contains_task(const struct tsync_works *s, + struct task_struct *task) +{ + size_t i; + + for (i = 0; i < s->size; i++) + if (s->works[i]->task == task) + return true; + return false; +} + +/* + * tsync_works_release - frees memory held by s and drops all task references + * + * This does not free s itself, only the data structures held by it. + */ +static void tsync_works_release(struct tsync_works *s) +{ + size_t i; + + for (i = 0; i < s->size; i++) { + if (!s->works[i]->task) + continue; + + put_task_struct(s->works[i]->task); + } + + for (i = 0; i < s->capacity; i++) + kfree(s->works[i]); + kfree(s->works); + s->works = NULL; + s->size = 0; + s->capacity = 0; +} + +/* + * count_additional_threads - counts the sibling threads that are not in works + */ +static size_t count_additional_threads(const struct tsync_works *works) +{ + struct task_struct *thread, *caller; + size_t n = 0; + + caller = current; + + guard(rcu)(); + + for_each_thread(caller, thread) { + /* Skip current, since it is initiating the sync. */ + if (thread == caller) + continue; + + /* Skip exited threads. */ + if (thread->flags & PF_EXITING) + continue; + + /* Skip threads that we have already seen. */ + if (tsync_works_contains_task(works, thread)) + continue; + + n++; + } + return n; +} + +/* + * schedule_task_work - adds task_work for all eligible sibling threads + * which have not been scheduled yet + * + * For each added task_work, atomically increments shared_ctx->num_preparing and + * shared_ctx->num_unfinished. + * + * Returns: + * true, if at least one eligible sibling thread was found + */ +static bool schedule_task_work(struct tsync_works *works, + struct tsync_shared_context *shared_ctx) +{ + int err; + struct task_struct *thread, *caller; + struct tsync_work *ctx; + bool found_more_threads = false; + + caller = current; + + guard(rcu)(); + + for_each_thread(caller, thread) { + /* Skip current, since it is initiating the sync. */ + if (thread == caller) + continue; + + /* Skip exited threads. */ + if (thread->flags & PF_EXITING) + continue; + + /* Skip threads that we already looked at. */ + if (tsync_works_contains_task(works, thread)) + continue; + + /* + * We found a sibling thread that is not doing its task_work yet, and + * which might spawn new threads before our task work runs, so we need + * at least one more round in the outer loop. + */ + found_more_threads = true; + + ctx = tsync_works_provide(works, thread); + if (!ctx) { + /* + * We ran out of preallocated contexts -- we need to try again with + * this thread at a later time! + * found_more_threads is already true at this point. + */ + break; + } + + ctx->shared_ctx = shared_ctx; + + atomic_inc(&shared_ctx->num_preparing); + atomic_inc(&shared_ctx->num_unfinished); + + init_task_work(&ctx->work, restrict_one_thread_callback); + err = task_work_add(thread, &ctx->work, TWA_SIGNAL); + if (err) { + /* + * task_work_add() only fails if the task is about to exit. We + * checked that earlier, but it can happen as a race. Resume + * without setting an error, as the task is probably gone in the + * next loop iteration. For consistency, remove the task from ctx + * so that it does not look like we handed it a task_work. + */ + put_task_struct(ctx->task); + ctx->task = NULL; + + atomic_dec(&shared_ctx->num_preparing); + atomic_dec(&shared_ctx->num_unfinished); + } + } + + return found_more_threads; +} + +/* + * cancel_tsync_works - cancel all task works where it is possible + * + * Task works can be canceled as long as they are still queued and have not + * started running. If they get canceled, we decrement + * shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two + * completions if needed, as if the task was never scheduled. + */ +static void cancel_tsync_works(struct tsync_works *works, + struct tsync_shared_context *shared_ctx) +{ + int i; + + for (i = 0; i < works->size; i++) { + if (!task_work_cancel(works->works[i]->task, + &works->works[i]->work)) + continue; + + /* After dequeueing, act as if the task work had executed. */ + + if (atomic_dec_return(&shared_ctx->num_preparing) == 0) + complete_all(&shared_ctx->all_prepared); + + if (atomic_dec_return(&shared_ctx->num_unfinished) == 0) + complete_all(&shared_ctx->all_finished); + } +} + +/* + * restrict_sibling_threads - enables a Landlock policy for all sibling threads + */ +int landlock_restrict_sibling_threads(const struct cred *old_cred, + const struct cred *new_cred) +{ + int err; + struct tsync_shared_context shared_ctx; + struct tsync_works works = {}; + size_t newly_discovered_threads; + bool found_more_threads; + + atomic_set(&shared_ctx.preparation_error, 0); + init_completion(&shared_ctx.all_prepared); + init_completion(&shared_ctx.ready_to_commit); + atomic_set(&shared_ctx.num_unfinished, 1); + init_completion(&shared_ctx.all_finished); + shared_ctx.old_cred = old_cred; + shared_ctx.new_cred = new_cred; + shared_ctx.set_no_new_privs = task_no_new_privs(current); + + /* + * We schedule a pseudo-signal task_work for each of the calling task's + * sibling threads. In the task work, each thread: + * + * 1) runs prepare_creds() and writes back the error to + * shared_ctx.preparation_error, if needed. + * + * 2) signals that it's done with prepare_creds() to the calling task. + * (completion "all_prepared"). + * + * 3) waits for the completion "ready_to_commit". This is sent by the + * calling task after ensuring that all sibling threads have done + * with the "preparation" stage. + * + * After this barrier is reached, it's safe to read + * shared_ctx.preparation_error. + * + * 4) reads shared_ctx.preparation_error and then either does commit_creds() + * or abort_creds(). + * + * 5) signals that it's done altogether (barrier synchronization + * "all_finished") + * + * Unlike seccomp, which modifies sibling tasks directly, we do not need to + * acquire the cred_guard_mutex and sighand->siglock: + * + * - As in our case, all threads are themselves exchanging their own struct + * cred through the credentials API, no locks are needed for that. + * - Our for_each_thread() loops are protected by RCU. + * - We do not acquire a lock to keep the list of sibling threads stable + * between our for_each_thread loops. If the list of available sibling + * threads changes between these for_each_thread loops, we make up for + * that by continuing to look for threads until they are all discovered + * and have entered their task_work, where they are unable to spawn new + * threads. + */ + do { + /* In RCU read-lock, count the threads we need. */ + newly_discovered_threads = count_additional_threads(&works); + + if (newly_discovered_threads == 0) + break; /* done */ + + err = tsync_works_grow_by(&works, newly_discovered_threads, + GFP_KERNEL_ACCOUNT); + if (err) { + atomic_set(&shared_ctx.preparation_error, err); + break; + } + + /* + * The "all_prepared" barrier is used locally to the loop body, this use + * of for_each_thread(). We can reset it on each loop iteration because + * all previous loop iterations are done with it already. + * + * num_preparing is initialized to 1 so that the counter can not go to 0 + * and mark the completion as done before all task works are registered. + * We decrement it at the end of the loop body. + */ + atomic_set(&shared_ctx.num_preparing, 1); + reinit_completion(&shared_ctx.all_prepared); + + /* + * In RCU read-lock, schedule task work on newly discovered sibling + * tasks. + */ + found_more_threads = schedule_task_work(&works, &shared_ctx); + + /* + * Decrement num_preparing for current, to undo that we initialized it + * to 1 a few lines above. + */ + if (atomic_dec_return(&shared_ctx.num_preparing) > 0) { + if (wait_for_completion_interruptible( + &shared_ctx.all_prepared)) { + /* In case of interruption, we need to retry the system call. */ + atomic_set(&shared_ctx.preparation_error, + -ERESTARTNOINTR); + + /* + * Cancel task works for tasks that did not start running yet, + * and decrement all_prepared and num_unfinished accordingly. + */ + cancel_tsync_works(&works, &shared_ctx); + + /* + * The remaining task works have started running, so waiting for + * their completion will finish. + */ + wait_for_completion(&shared_ctx.all_prepared); + } + } + } while (found_more_threads && + !atomic_read(&shared_ctx.preparation_error)); + + /* + * We now have all sibling threads blocking and in "prepared" state in the + * task work. Ask all threads to commit. + */ + complete_all(&shared_ctx.ready_to_commit); + + /* + * Decrement num_unfinished for current, to undo that we initialized it to 1 + * at the beginning. + */ + if (atomic_dec_return(&shared_ctx.num_unfinished) > 0) + wait_for_completion(&shared_ctx.all_finished); + + tsync_works_release(&works); + + return atomic_read(&shared_ctx.preparation_error); +} diff --git a/security/landlock/tsync.h b/security/landlock/tsync.h new file mode 100644 index 000000000000..ef86bb61c2f6 --- /dev/null +++ b/security/landlock/tsync.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Landlock - Cross-thread ruleset enforcement + * + * Copyright © 2025 Google LLC + */ + +#ifndef _SECURITY_LANDLOCK_TSYNC_H +#define _SECURITY_LANDLOCK_TSYNC_H + +#include <linux/cred.h> + +int landlock_restrict_sibling_threads(const struct cred *old_cred, + const struct cred *new_cred); + +#endif /* _SECURITY_LANDLOCK_TSYNC_H */ |
