From 73e75e6fc352bdca08f7e0893d5b6bb37171bdd2 Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Tue, 21 May 2024 11:21:26 +0200 Subject: cgroup/pids: Separate semantics of pids.events related to pids.max MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, when pids.max limit is breached in the hierarchy, the event is counted and reported in the cgroup where the forking task resides. This decouples the limit and the notification caused by the limit making it hard to detect when the actual limit was effected. Redefine the pids.events:max as: the number of times the limit of the cgroup was hit. (Implementation differentiates also "forkfail" event but this is currently not exposed as it would better fit into pids.stat. It also differs from pids.events:max only when pids.max is configured on non-leaf cgroups.) Since it changes semantics of the original "max" event, introduce this change only in the v2 API of the controller and add a cgroup2 mount option to revert to the legacy behavior. Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ea48c861cd36..b36690ca0d3f 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -119,7 +119,12 @@ enum { /* * Enable hugetlb accounting for the memory controller. */ - CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19), + CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19), + + /* + * Enable legacy local pids.events. + */ + CGRP_ROOT_PIDS_LOCAL_EVENTS = (1 << 20), }; /* cftype->flags */ -- cgit v1.2.3 From 7a4479680d7fd05c7a3efa87b41f421af48fbbdf Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 1 Jul 2024 16:49:37 -0700 Subject: cgroup_misc: add kernel-doc comments for enum misc_res_type Fully document enum misc_res_type with kernel-doc comments to prevent kernel-doc warnings: misc_cgroup.h:12: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Types of misc cgroup entries supported by the host. misc_cgroup.h:12: warning: missing initial short description on line: * Types of misc cgroup entries supported by the host. Fixes: a72232eabdfc ("cgroup: Add misc cgroup controller") Signed-off-by: Randy Dunlap Cc: cgroups@vger.kernel.org Signed-off-by: Tejun Heo --- include/linux/misc_cgroup.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index e799b1f8d05b..d70eab2501ee 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -9,15 +9,16 @@ #define _MISC_CGROUP_H_ /** - * Types of misc cgroup entries supported by the host. + * enum misc_res_type - Types of misc cgroup entries supported by the host. */ enum misc_res_type { #ifdef CONFIG_KVM_AMD_SEV - /* AMD SEV ASIDs resource */ + /** @MISC_CG_RES_SEV: AMD SEV ASIDs resource */ MISC_CG_RES_SEV, - /* AMD SEV-ES ASIDs resource */ + /** @MISC_CG_RES_SEV_ES: AMD SEV-ES ASIDs resource */ MISC_CG_RES_SEV_ES, #endif + /** @MISC_CG_RES_TYPES: count of enum misc_res_type constants */ MISC_CG_RES_TYPES }; -- cgit v1.2.3 From 1028f391d5f9d4248e2f49193e6de2516ad630f8 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Wed, 3 Jul 2024 00:36:46 +0000 Subject: cgroup/misc: Introduce misc.peak Introduce misc.peak to record the historical maximum usage of the resource, as in some scenarios the value of misc.max could be adjusted based on the peak usage of the resource. Signed-off-by: Xiu Jianfeng Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 9 ++++++++ include/linux/misc_cgroup.h | 2 ++ kernel/cgroup/misc.c | 41 +++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+) (limited to 'include') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 2e4e74bea6ef..52763d6b2919 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2642,6 +2642,15 @@ Miscellaneous controller provides 3 interface files. If two misc resources (res_ res_a 3 res_b 0 + misc.peak + A read-only flat-keyed file shown in all cgroups. It shows the + historical maximum usage of the resources in the cgroup and its + children.:: + + $ cat misc.peak + res_a 10 + res_b 8 + misc.max A read-write flat-keyed file shown in the non root cgroups. Allowed maximum usage of the resources in the cgroup and its children.:: diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index d70eab2501ee..618392d41975 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -31,11 +31,13 @@ struct misc_cg; /** * struct misc_res: Per cgroup per misc type resource * @max: Maximum limit on the resource. + * @watermark: Historical maximum usage of the resource. * @usage: Current usage of the resource. * @events: Number of times, the resource limit exceeded. */ struct misc_res { u64 max; + atomic64_t watermark; atomic64_t usage; atomic64_t events; }; diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c index 79a3717a5803..b92daf5d234d 100644 --- a/kernel/cgroup/misc.c +++ b/kernel/cgroup/misc.c @@ -121,6 +121,19 @@ static void misc_cg_cancel_charge(enum misc_res_type type, struct misc_cg *cg, misc_res_name[type]); } +static void misc_cg_update_watermark(struct misc_res *res, u64 new_usage) +{ + u64 old; + + while (true) { + old = atomic64_read(&res->watermark); + if (new_usage <= old) + break; + if (atomic64_cmpxchg(&res->watermark, old, new_usage) == old) + break; + } +} + /** * misc_cg_try_charge() - Try charging the misc cgroup. * @type: Misc res type to charge. @@ -159,6 +172,7 @@ int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount) ret = -EBUSY; goto err_charge; } + misc_cg_update_watermark(res, new_usage); } return 0; @@ -307,6 +321,29 @@ static int misc_cg_current_show(struct seq_file *sf, void *v) return 0; } +/** + * misc_cg_peak_show() - Show the peak usage of the misc cgroup. + * @sf: Interface file + * @v: Arguments passed + * + * Context: Any context. + * Return: 0 to denote successful print. + */ +static int misc_cg_peak_show(struct seq_file *sf, void *v) +{ + int i; + u64 watermark; + struct misc_cg *cg = css_misc(seq_css(sf)); + + for (i = 0; i < MISC_CG_RES_TYPES; i++) { + watermark = atomic64_read(&cg->res[i].watermark); + if (READ_ONCE(misc_res_capacity[i]) || watermark) + seq_printf(sf, "%s %llu\n", misc_res_name[i], watermark); + } + + return 0; +} + /** * misc_cg_capacity_show() - Show the total capacity of misc res on the host. * @sf: Interface file @@ -357,6 +394,10 @@ static struct cftype misc_cg_files[] = { .name = "current", .seq_show = misc_cg_current_show, }, + { + .name = "peak", + .seq_show = misc_cg_peak_show, + }, { .name = "capacity", .seq_show = misc_cg_capacity_show, -- cgit v1.2.3 From 6a26f9c68901797261bc145975a02f85be0c1d8f Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Thu, 11 Jul 2024 10:14:57 +0000 Subject: cgroup/misc: Introduce misc.events.local Currently the event counting provided by misc.events is hierarchical, it's not practical if user is only concerned with events of a specified cgroup. Therefore, introduce misc.events.local collect events specific to the given cgroup. This is analogous to memory.events.local and pids.events.local. Signed-off-by: Xiu Jianfeng Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 5 +++++ include/linux/misc_cgroup.h | 3 +++ kernel/cgroup/misc.c | 39 ++++++++++++++++++++++++++++----- 3 files changed, 41 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 52763d6b2919..05862f06ed26 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2680,6 +2680,11 @@ Miscellaneous controller provides 3 interface files. If two misc resources (res_ The number of times the cgroup's resource usage was about to go over the max boundary. + misc.events.local + Similar to misc.events but the fields in the file are local to the + cgroup i.e. not hierarchical. The file modified event generated on + this file reflects only the local events. + Migration and Ownership ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index 618392d41975..49eef10c8e59 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -40,6 +40,7 @@ struct misc_res { atomic64_t watermark; atomic64_t usage; atomic64_t events; + atomic64_t events_local; }; /** @@ -53,6 +54,8 @@ struct misc_cg { /* misc.events */ struct cgroup_file events_file; + /* misc.events.local */ + struct cgroup_file events_local_file; struct misc_res res[MISC_CG_RES_TYPES]; }; diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c index b92daf5d234d..0e26068995a6 100644 --- a/kernel/cgroup/misc.c +++ b/kernel/cgroup/misc.c @@ -134,6 +134,17 @@ static void misc_cg_update_watermark(struct misc_res *res, u64 new_usage) } } +static void misc_cg_event(enum misc_res_type type, struct misc_cg *cg) +{ + atomic64_inc(&cg->res[type].events_local); + cgroup_file_notify(&cg->events_local_file); + + for (; parent_misc(cg); cg = parent_misc(cg)) { + atomic64_inc(&cg->res[type].events); + cgroup_file_notify(&cg->events_file); + } +} + /** * misc_cg_try_charge() - Try charging the misc cgroup. * @type: Misc res type to charge. @@ -177,10 +188,7 @@ int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount) return 0; err_charge: - for (j = i; j; j = parent_misc(j)) { - atomic64_inc(&j->res[type].events); - cgroup_file_notify(&j->events_file); - } + misc_cg_event(type, i); for (j = cg; j != i; j = parent_misc(j)) misc_cg_cancel_charge(type, j, amount); @@ -368,20 +376,33 @@ static int misc_cg_capacity_show(struct seq_file *sf, void *v) return 0; } -static int misc_events_show(struct seq_file *sf, void *v) +static int __misc_events_show(struct seq_file *sf, bool local) { struct misc_cg *cg = css_misc(seq_css(sf)); u64 events; int i; for (i = 0; i < MISC_CG_RES_TYPES; i++) { - events = atomic64_read(&cg->res[i].events); + if (local) + events = atomic64_read(&cg->res[i].events_local); + else + events = atomic64_read(&cg->res[i].events); if (READ_ONCE(misc_res_capacity[i]) || events) seq_printf(sf, "%s.max %llu\n", misc_res_name[i], events); } return 0; } +static int misc_events_show(struct seq_file *sf, void *v) +{ + return __misc_events_show(sf, false); +} + +static int misc_events_local_show(struct seq_file *sf, void *v) +{ + return __misc_events_show(sf, true); +} + /* Misc cgroup interface files */ static struct cftype misc_cg_files[] = { { @@ -409,6 +430,12 @@ static struct cftype misc_cg_files[] = { .file_offset = offsetof(struct misc_cg, events_file), .seq_show = misc_events_show, }, + { + .name = "events.local", + .flags = CFTYPE_NOT_ON_ROOT, + .file_offset = offsetof(struct misc_cg, events_local_file), + .seq_show = misc_events_local_show, + }, {} }; -- cgit v1.2.3