From 03eb578b37659e10bed14c2d9e7cc45dfe24123b Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Dec 2025 09:20:48 -0800 Subject: x86,fs/resctrl: Improve domain type checking Every resctrl resource has a list of domain structures. struct rdt_ctrl_domain and struct rdt_mon_domain both begin with struct rdt_domain_hdr with rdt_domain_hdr::type used in validity checks before accessing the domain of a particular type. Add the resource id to struct rdt_domain_hdr in preparation for a new monitoring domain structure that will be associated with a new monitoring resource. Improve existing domain validity checks with a new helper domain_header_is_valid() that checks both domain type and resource id. domain_header_is_valid() should be used before every call to container_of() that accesses a domain structure. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com --- include/linux/resctrl.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 54701668b3df..e7c218f8d4f7 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -131,15 +131,24 @@ enum resctrl_domain_type { * @list: all instances of this resource * @id: unique id for this instance * @type: type of this instance + * @rid: resource id for this instance * @cpu_mask: which CPUs share this resource */ struct rdt_domain_hdr { struct list_head list; int id; enum resctrl_domain_type type; + enum resctrl_res_level rid; struct cpumask cpu_mask; }; +static inline bool domain_header_is_valid(struct rdt_domain_hdr *hdr, + enum resctrl_domain_type type, + enum resctrl_res_level rid) +{ + return !WARN_ON_ONCE(hdr->type != type || hdr->rid != rid); +} + /** * struct rdt_ctrl_domain - group of CPUs sharing a resctrl control resource * @hdr: common header for different domain types -- cgit v1.2.3 From 97fec06d35b2c1ce6d80cf3b01bfddd82c720a2d Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Dec 2025 09:20:52 -0800 Subject: x86,fs/resctrl: Refactor domain create/remove using struct rdt_domain_hdr Up until now, all monitoring events were associated with the L3 resource and it made sense to use the L3 specific "struct rdt_mon_domain *" argument to functions operating on domains. Telemetry events will be tied to a new resource with its instances represented by a new domain structure that, just like struct rdt_mon_domain, starts with the generic struct rdt_domain_hdr. Prepare to support domains belonging to different resources by changing the calling convention of functions operating on domains. Pass the generic header and use that to find the domain specific structure where needed. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com --- arch/x86/kernel/cpu/resctrl/core.c | 4 +-- fs/resctrl/ctrlmondata.c | 14 +++++--- fs/resctrl/internal.h | 2 +- fs/resctrl/rdtgroup.c | 69 ++++++++++++++++++++++++++------------ include/linux/resctrl.h | 4 +-- 5 files changed, 63 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 64ed81cbf8bf..1fab4c67d273 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -534,7 +534,7 @@ static void l3_mon_domain_setup(int cpu, int id, struct rdt_resource *r, struct list_add_tail_rcu(&d->hdr.list, add_pos); - err = resctrl_online_mon_domain(r, d); + err = resctrl_online_mon_domain(r, &d->hdr); if (err) { list_del_rcu(&d->hdr.list); synchronize_rcu(); @@ -661,7 +661,7 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) d = container_of(hdr, struct rdt_mon_domain, hdr); hw_dom = resctrl_to_arch_mon_dom(d); - resctrl_offline_mon_domain(r, d); + resctrl_offline_mon_domain(r, hdr); list_del_rcu(&hdr->list); synchronize_rcu(); mon_domain_free(hw_dom); diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 905c310de573..3154cdc98a31 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -551,14 +551,21 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id, } void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, - struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, + struct rdt_domain_hdr *hdr, struct rdtgroup *rdtgrp, cpumask_t *cpumask, int evtid, int first) { + struct rdt_mon_domain *d = NULL; int cpu; /* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); + if (hdr) { + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) + return; + d = container_of(hdr, struct rdt_mon_domain, hdr); + } + /* * Setup the parameters to pass to mon_event_count() to read the data. */ @@ -653,12 +660,11 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) * the resource to find the domain with "domid". */ hdr = resctrl_find_domain(&r->mon_domains, domid, NULL); - if (!hdr || !domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, resid)) { + if (!hdr) { ret = -ENOENT; goto out; } - d = container_of(hdr, struct rdt_mon_domain, hdr); - mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false); + mon_event_read(&rr, r, hdr, rdtgrp, &hdr->cpu_mask, evtid, false); } checkresult: diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index bff4a54ae333..5e52269b391e 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -362,7 +362,7 @@ void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, - struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, + struct rdt_domain_hdr *hdr, struct rdtgroup *rdtgrp, cpumask_t *cpumask, int evtid, int first); int resctrl_mon_resource_init(void); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 8e39dfda56bc..89ffe54fb0fc 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3229,17 +3229,22 @@ static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subn * when last domain being summed is removed. */ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - struct rdt_mon_domain *d) + struct rdt_domain_hdr *hdr) { struct rdtgroup *prgrp, *crgrp; + struct rdt_mon_domain *d; char subname[32]; bool snc_mode; char name[32]; + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) + return; + + d = container_of(hdr, struct rdt_mon_domain, hdr); snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : hdr->id); if (snc_mode) - sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); + sprintf(subname, "mon_sub_%s_%02d", r->name, hdr->id); list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname); @@ -3249,15 +3254,20 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, } } -static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, +static int mon_add_all_files(struct kernfs_node *kn, struct rdt_domain_hdr *hdr, struct rdt_resource *r, struct rdtgroup *prgrp, bool do_sum) { struct rmid_read rr = {0}; + struct rdt_mon_domain *d; struct mon_data *priv; struct mon_evt *mevt; int ret, domid; + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) + return -EINVAL; + + d = container_of(hdr, struct rdt_mon_domain, hdr); for_each_mon_event(mevt) { if (mevt->rid != r->rid || !mevt->enabled) continue; @@ -3271,23 +3281,28 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, return ret; if (!do_sum && resctrl_is_mbm_event(mevt->evtid)) - mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true); + mon_event_read(&rr, r, hdr, prgrp, &hdr->cpu_mask, mevt->evtid, true); } return 0; } static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, - struct rdt_mon_domain *d, + struct rdt_domain_hdr *hdr, struct rdt_resource *r, struct rdtgroup *prgrp) { struct kernfs_node *kn, *ckn; + struct rdt_mon_domain *d; char name[32]; bool snc_mode; int ret = 0; lockdep_assert_held(&rdtgroup_mutex); + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) + return -EINVAL; + + d = container_of(hdr, struct rdt_mon_domain, hdr); snc_mode = r->mon_scope == RESCTRL_L3_NODE; sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); kn = kernfs_find_and_get(parent_kn, name); @@ -3305,13 +3320,13 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; - ret = mon_add_all_files(kn, d, r, prgrp, snc_mode); + ret = mon_add_all_files(kn, hdr, r, prgrp, snc_mode); if (ret) goto out_destroy; } if (snc_mode) { - sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id); + sprintf(name, "mon_sub_%s_%02d", r->name, hdr->id); ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp); if (IS_ERR(ckn)) { ret = -EINVAL; @@ -3322,7 +3337,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (ret) goto out_destroy; - ret = mon_add_all_files(ckn, d, r, prgrp, false); + ret = mon_add_all_files(ckn, hdr, r, prgrp, false); if (ret) goto out_destroy; } @@ -3340,7 +3355,7 @@ out_destroy: * and "monitor" groups with given domain id. */ static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - struct rdt_mon_domain *d) + struct rdt_domain_hdr *hdr) { struct kernfs_node *parent_kn; struct rdtgroup *prgrp, *crgrp; @@ -3348,12 +3363,12 @@ static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { parent_kn = prgrp->mon.mon_data_kn; - mkdir_mondata_subdir(parent_kn, d, r, prgrp); + mkdir_mondata_subdir(parent_kn, hdr, r, prgrp); head = &prgrp->mon.crdtgrp_list; list_for_each_entry(crgrp, head, mon.crdtgrp_list) { parent_kn = crgrp->mon.mon_data_kn; - mkdir_mondata_subdir(parent_kn, d, r, crgrp); + mkdir_mondata_subdir(parent_kn, hdr, r, crgrp); } } } @@ -3362,14 +3377,14 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, struct rdt_resource *r, struct rdtgroup *prgrp) { - struct rdt_mon_domain *dom; + struct rdt_domain_hdr *hdr; int ret; /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - list_for_each_entry(dom, &r->mon_domains, hdr.list) { - ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp); + list_for_each_entry(hdr, &r->mon_domains, list) { + ret = mkdir_mondata_subdir(parent_kn, hdr, r, prgrp); if (ret) return ret; } @@ -4253,16 +4268,23 @@ void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain mutex_unlock(&rdtgroup_mutex); } -void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr) { + struct rdt_mon_domain *d; + mutex_lock(&rdtgroup_mutex); + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) + goto out_unlock; + + d = container_of(hdr, struct rdt_mon_domain, hdr); + /* * If resctrl is mounted, remove all the * per domain monitor data directories. */ if (resctrl_mounted && resctrl_arch_mon_capable()) - rmdir_mondata_subdir_allrdtgrp(r, d); + rmdir_mondata_subdir_allrdtgrp(r, hdr); if (resctrl_is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); @@ -4280,7 +4302,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d } domain_destroy_mon_state(d); - +out_unlock: mutex_unlock(&rdtgroup_mutex); } @@ -4353,12 +4375,17 @@ int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d return err; } -int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr) { - int err; + struct rdt_mon_domain *d; + int err = -EINVAL; mutex_lock(&rdtgroup_mutex); + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) + goto out_unlock; + + d = container_of(hdr, struct rdt_mon_domain, hdr); err = domain_setup_mon_state(r, d); if (err) goto out_unlock; @@ -4379,7 +4406,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) * If resctrl is mounted, add per domain monitor data directories. */ if (resctrl_mounted && resctrl_arch_mon_capable()) - mkdir_mondata_subdir_allrdtgrp(r, d); + mkdir_mondata_subdir_allrdtgrp(r, hdr); out_unlock: mutex_unlock(&rdtgroup_mutex); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index e7c218f8d4f7..5db37c7e89c5 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -507,9 +507,9 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type); int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d); -int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d); +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr); void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d); -void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d); +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr); void resctrl_online_cpu(unsigned int cpu); void resctrl_offline_cpu(unsigned int cpu); -- cgit v1.2.3 From 6b10cf7b6ea857cdf9570e21c077a05803f60575 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Dec 2025 09:20:54 -0800 Subject: x86,fs/resctrl: Use struct rdt_domain_hdr when reading counters Convert the whole call sequence from mon_event_read() to resctrl_arch_rmid_read() to pass resource independent struct rdt_domain_hdr instead of an L3 specific domain structure to prepare for monitoring events in other resources. This additional layer of indirection obscures which aspects of event counting depend on a valid domain. Event initialization, support for assignable counters, and normal event counting implicitly depend on a valid domain while summing of domains does not. Split summing domains from the core event counting handling to make their respective dependencies obvious. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com --- arch/x86/kernel/cpu/resctrl/monitor.c | 12 +++-- fs/resctrl/ctrlmondata.c | 9 +--- fs/resctrl/internal.h | 18 ++++---- fs/resctrl/monitor.c | 85 +++++++++++++++++++++++------------ include/linux/resctrl.h | 4 +- 5 files changed, 78 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index dffcc8307500..3da970ea1903 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -238,19 +238,25 @@ static u64 get_corrected_val(struct rdt_resource *r, struct rdt_mon_domain *d, return chunks * hw_res->mon_scale; } -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, u32 unused, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *ignored) { - struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); - int cpu = cpumask_any(&d->hdr.cpu_mask); + struct rdt_hw_mon_domain *hw_dom; struct arch_mbm_state *am; + struct rdt_mon_domain *d; u64 msr_val; u32 prmid; + int cpu; int ret; resctrl_arch_rmid_read_context_check(); + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) + return -EINVAL; + d = container_of(hdr, struct rdt_mon_domain, hdr); + hw_dom = resctrl_to_arch_mon_dom(d); + cpu = cpumask_any(&hdr->cpu_mask); prmid = logical_rmid_to_physical_rmid(cpu, rmid); ret = __rmid_read_phys(prmid, eventid, &msr_val); diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 3154cdc98a31..9242a2982e77 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -554,25 +554,18 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_domain_hdr *hdr, struct rdtgroup *rdtgrp, cpumask_t *cpumask, int evtid, int first) { - struct rdt_mon_domain *d = NULL; int cpu; /* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - if (hdr) { - if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) - return; - d = container_of(hdr, struct rdt_mon_domain, hdr); - } - /* * Setup the parameters to pass to mon_event_count() to read the data. */ rr->rgrp = rdtgrp; rr->evtid = evtid; rr->r = r; - rr->d = d; + rr->hdr = hdr; rr->first = first; if (resctrl_arch_mbm_cntr_assign_enabled(r) && resctrl_is_mbm_event(evtid)) { diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 5e52269b391e..9912b774a580 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -106,24 +106,26 @@ struct mon_data { * resource group then its event count is summed with the count from all * its child resource groups. * @r: Resource describing the properties of the event being read. - * @d: Domain that the counter should be read from. If NULL then sum all - * domains in @r sharing L3 @ci.id + * @hdr: Header of domain that the counter should be read from. If NULL then + * sum all domains in @r sharing L3 @ci.id * @evtid: Which monitor event to read. * @first: Initialize MBM counter when true. - * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. + * @ci: Cacheinfo for L3. Only set when @hdr is NULL. Used when summing + * domains. * @is_mbm_cntr: true if "mbm_event" counter assignment mode is enabled and it * is an MBM event. * @err: Error encountered when reading counter. - * @val: Returned value of event counter. If @rgrp is a parent resource group, - * @val includes the sum of event counts from its child resource groups. - * If @d is NULL, @val includes the sum of all domains in @r sharing @ci.id, - * (summed across child resource groups if @rgrp is a parent resource group). + * @val: Returned value of event counter. If @rgrp is a parent resource + * group, @val includes the sum of event counts from its child + * resource groups. If @hdr is NULL, @val includes the sum of all + * domains in @r sharing @ci.id, (summed across child resource groups + * if @rgrp is a parent resource group). * @arch_mon_ctx: Hardware monitor allocated for this read request (MPAM only). */ struct rmid_read { struct rdtgroup *rgrp; struct rdt_resource *r; - struct rdt_mon_domain *d; + struct rdt_domain_hdr *hdr; enum resctrl_event_id evtid; bool first; struct cacheinfo *ci; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index b5e0db38c8bf..e1c12201388f 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -159,7 +159,7 @@ void __check_limbo(struct rdt_mon_domain *d, bool force_free) break; entry = __rmid_entry(idx); - if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid, + if (resctrl_arch_rmid_read(r, &d->hdr, entry->closid, entry->rmid, QOS_L3_OCCUP_EVENT_ID, &val, arch_mon_ctx)) { rmid_dirty = true; @@ -421,11 +421,16 @@ static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) struct rdt_mon_domain *d; int cntr_id = -ENOENT; struct mbm_state *m; - int err, ret; u64 tval = 0; + if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) { + rr->err = -EIO; + return -EINVAL; + } + d = container_of(rr->hdr, struct rdt_mon_domain, hdr); + if (rr->is_mbm_cntr) { - cntr_id = mbm_cntr_get(rr->r, rr->d, rdtgrp, rr->evtid); + cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evtid); if (cntr_id < 0) { rr->err = -ENOENT; return -EINVAL; @@ -434,31 +439,50 @@ static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) if (rr->first) { if (rr->is_mbm_cntr) - resctrl_arch_reset_cntr(rr->r, rr->d, closid, rmid, cntr_id, rr->evtid); + resctrl_arch_reset_cntr(rr->r, d, closid, rmid, cntr_id, rr->evtid); else - resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid); - m = get_mbm_state(rr->d, closid, rmid, rr->evtid); + resctrl_arch_reset_rmid(rr->r, d, closid, rmid, rr->evtid); + m = get_mbm_state(d, closid, rmid, rr->evtid); if (m) memset(m, 0, sizeof(struct mbm_state)); return 0; } - if (rr->d) { - /* Reading a single domain, must be on a CPU in that domain. */ - if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask)) - return -EINVAL; - if (rr->is_mbm_cntr) - rr->err = resctrl_arch_cntr_read(rr->r, rr->d, closid, rmid, cntr_id, - rr->evtid, &tval); - else - rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, - rr->evtid, &tval, rr->arch_mon_ctx); - if (rr->err) - return rr->err; + /* Reading a single domain, must be on a CPU in that domain. */ + if (!cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) + return -EINVAL; + if (rr->is_mbm_cntr) + rr->err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id, + rr->evtid, &tval); + else + rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, closid, rmid, + rr->evtid, &tval, rr->arch_mon_ctx); + if (rr->err) + return rr->err; - rr->val += tval; + rr->val += tval; - return 0; + return 0; +} + +static int __l3_mon_event_count_sum(struct rdtgroup *rdtgrp, struct rmid_read *rr) +{ + int cpu = smp_processor_id(); + u32 closid = rdtgrp->closid; + u32 rmid = rdtgrp->mon.rmid; + struct rdt_mon_domain *d; + u64 tval = 0; + int err, ret; + + /* + * Summing across domains is only done for systems that implement + * Sub-NUMA Cluster. There is no overlap with systems that support + * assignable counters. + */ + if (rr->is_mbm_cntr) { + pr_warn_once("Summing domains using assignable counters is not supported\n"); + rr->err = -EINVAL; + return -EINVAL; } /* Summing domains that share a cache, must be on a CPU for that cache. */ @@ -476,12 +500,8 @@ static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { if (d->ci_id != rr->ci->id) continue; - if (rr->is_mbm_cntr) - err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id, - rr->evtid, &tval); - else - err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, - rr->evtid, &tval, rr->arch_mon_ctx); + err = resctrl_arch_rmid_read(rr->r, &d->hdr, closid, rmid, + rr->evtid, &tval, rr->arch_mon_ctx); if (!err) { rr->val += tval; ret = 0; @@ -498,7 +518,10 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) { switch (rr->r->rid) { case RDT_RESOURCE_L3: - return __l3_mon_event_count(rdtgrp, rr); + if (rr->hdr) + return __l3_mon_event_count(rdtgrp, rr); + else + return __l3_mon_event_count_sum(rdtgrp, rr); default: rr->err = -EINVAL; return -EINVAL; @@ -522,9 +545,13 @@ static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) u64 cur_bw, bytes, cur_bytes; u32 closid = rdtgrp->closid; u32 rmid = rdtgrp->mon.rmid; + struct rdt_mon_domain *d; struct mbm_state *m; - m = get_mbm_state(rr->d, closid, rmid, rr->evtid); + if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) + return; + d = container_of(rr->hdr, struct rdt_mon_domain, hdr); + m = get_mbm_state(d, closid, rmid, rr->evtid); if (WARN_ON_ONCE(!m)) return; @@ -697,7 +724,7 @@ static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain * struct rmid_read rr = {0}; rr.r = r; - rr.d = d; + rr.hdr = &d->hdr; rr.evtid = evtid; if (resctrl_arch_mbm_cntr_assign_enabled(r)) { rr.is_mbm_cntr = true; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 5db37c7e89c5..9b9877fb3238 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -517,7 +517,7 @@ void resctrl_offline_cpu(unsigned int cpu); * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid * for this resource and domain. * @r: resource that the counter should be read from. - * @d: domain that the counter should be read from. + * @hdr: Header of domain that the counter should be read from. * @closid: closid that matches the rmid. Depending on the architecture, the * counter may match traffic of both @closid and @rmid, or @rmid * only. @@ -538,7 +538,7 @@ void resctrl_offline_cpu(unsigned int cpu); * Return: * 0 on success, or -EIO, -EINVAL etc on error. */ -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, u32 closid, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *arch_mon_ctx); -- cgit v1.2.3 From 4bc3ef46ff41d5e7ba557e56e9cd2031527cd7f8 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Dec 2025 09:20:55 -0800 Subject: x86,fs/resctrl: Rename struct rdt_mon_domain and rdt_hw_mon_domain The upcoming telemetry event monitoring is not tied to the L3 resource and will have a new domain structure. Rename the L3 resource specific domain data structures to include "l3_" in their names to avoid confusion between the different resource specific domain structures: rdt_mon_domain -> rdt_l3_mon_domain rdt_hw_mon_domain -> rdt_hw_l3_mon_domain No functional change. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com --- arch/x86/kernel/cpu/resctrl/core.c | 14 +++---- arch/x86/kernel/cpu/resctrl/internal.h | 16 ++++---- arch/x86/kernel/cpu/resctrl/monitor.c | 36 ++++++++--------- fs/resctrl/ctrlmondata.c | 2 +- fs/resctrl/internal.h | 8 ++-- fs/resctrl/monitor.c | 70 +++++++++++++++++----------------- fs/resctrl/rdtgroup.c | 40 +++++++++---------- include/linux/resctrl.h | 22 +++++------ 8 files changed, 104 insertions(+), 104 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 1fab4c67d273..cc1b846f9645 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -368,7 +368,7 @@ static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) kfree(hw_dom); } -static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom) +static void mon_domain_free(struct rdt_hw_l3_mon_domain *hw_dom) { int idx; @@ -405,7 +405,7 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain * * @num_rmid: The size of the MBM counter array * @hw_dom: The domain that owns the allocated arrays */ -static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) +static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_l3_mon_domain *hw_dom) { size_t tsize = sizeof(*hw_dom->arch_mbm_states[0]); enum resctrl_event_id eventid; @@ -503,8 +503,8 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) static void l3_mon_domain_setup(int cpu, int id, struct rdt_resource *r, struct list_head *add_pos) { - struct rdt_hw_mon_domain *hw_dom; - struct rdt_mon_domain *d; + struct rdt_hw_l3_mon_domain *hw_dom; + struct rdt_l3_mon_domain *d; struct cacheinfo *ci; int err; @@ -653,13 +653,13 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) switch (r->rid) { case RDT_RESOURCE_L3: { - struct rdt_hw_mon_domain *hw_dom; - struct rdt_mon_domain *d; + struct rdt_hw_l3_mon_domain *hw_dom; + struct rdt_l3_mon_domain *d; if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) return; - d = container_of(hdr, struct rdt_mon_domain, hdr); + d = container_of(hdr, struct rdt_l3_mon_domain, hdr); hw_dom = resctrl_to_arch_mon_dom(d); resctrl_offline_mon_domain(r, hdr); list_del_rcu(&hdr->list); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 4a916c84a322..d73c0adf1026 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -63,17 +63,17 @@ struct rdt_hw_ctrl_domain { }; /** - * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share - * a resource for a monitor function - * @d_resctrl: Properties exposed to the resctrl file system + * struct rdt_hw_l3_mon_domain - Arch private attributes of a set of CPUs sharing + * RDT_RESOURCE_L3 monitoring + * @d_resctrl: Properties exposed to the resctrl file system * @arch_mbm_states: Per-event pointer to the MBM event's saved state. * An MBM event's state is an array of struct arch_mbm_state * indexed by RMID on x86. * * Members of this structure are accessed via helpers that provide abstraction. */ -struct rdt_hw_mon_domain { - struct rdt_mon_domain d_resctrl; +struct rdt_hw_l3_mon_domain { + struct rdt_l3_mon_domain d_resctrl; struct arch_mbm_state *arch_mbm_states[QOS_NUM_L3_MBM_EVENTS]; }; @@ -82,9 +82,9 @@ static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctr return container_of(r, struct rdt_hw_ctrl_domain, d_resctrl); } -static inline struct rdt_hw_mon_domain *resctrl_to_arch_mon_dom(struct rdt_mon_domain *r) +static inline struct rdt_hw_l3_mon_domain *resctrl_to_arch_mon_dom(struct rdt_l3_mon_domain *r) { - return container_of(r, struct rdt_hw_mon_domain, d_resctrl); + return container_of(r, struct rdt_hw_l3_mon_domain, d_resctrl); } /** @@ -140,7 +140,7 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r extern struct rdt_hw_resource rdt_resources_all[]; -void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d); +void arch_mon_domain_online(struct rdt_resource *r, struct rdt_l3_mon_domain *d); /* CPUID.(EAX=10H, ECX=ResID=1).EAX */ union cpuid_0x10_1_eax { diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 3da970ea1903..04b8f1e1f314 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -109,7 +109,7 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val) * * In RMID sharing mode there are fewer "logical RMID" values available * to accumulate data ("physical RMIDs" are divided evenly between SNC - * nodes that share an L3 cache). Linux creates an rdt_mon_domain for + * nodes that share an L3 cache). Linux creates an rdt_l3_mon_domain for * each SNC node. * * The value loaded into IA32_PQR_ASSOC is the "logical RMID". @@ -157,7 +157,7 @@ static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val) return 0; } -static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom, +static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_l3_mon_domain *hw_dom, u32 rmid, enum resctrl_event_id eventid) { @@ -171,11 +171,11 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do return state ? &state[rmid] : NULL; } -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid) { - struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + struct rdt_hw_l3_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); int cpu = cpumask_any(&d->hdr.cpu_mask); struct arch_mbm_state *am; u32 prmid; @@ -194,9 +194,9 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, * Assumes that hardware counters are also reset and thus that there is * no need to record initial non-zero counts. */ -void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d) +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d) { - struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + struct rdt_hw_l3_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); enum resctrl_event_id eventid; int idx; @@ -217,10 +217,10 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) return chunks >> shift; } -static u64 get_corrected_val(struct rdt_resource *r, struct rdt_mon_domain *d, +static u64 get_corrected_val(struct rdt_resource *r, struct rdt_l3_mon_domain *d, u32 rmid, enum resctrl_event_id eventid, u64 msr_val) { - struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + struct rdt_hw_l3_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct arch_mbm_state *am; u64 chunks; @@ -242,9 +242,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, u32 unused, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *ignored) { - struct rdt_hw_mon_domain *hw_dom; + struct rdt_hw_l3_mon_domain *hw_dom; + struct rdt_l3_mon_domain *d; struct arch_mbm_state *am; - struct rdt_mon_domain *d; u64 msr_val; u32 prmid; int cpu; @@ -254,7 +254,7 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) return -EINVAL; - d = container_of(hdr, struct rdt_mon_domain, hdr); + d = container_of(hdr, struct rdt_l3_mon_domain, hdr); hw_dom = resctrl_to_arch_mon_dom(d); cpu = cpumask_any(&hdr->cpu_mask); prmid = logical_rmid_to_physical_rmid(cpu, rmid); @@ -308,11 +308,11 @@ static int __cntr_id_read(u32 cntr_id, u64 *val) return 0; } -void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, +void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, u32 unused, u32 rmid, int cntr_id, enum resctrl_event_id eventid) { - struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + struct rdt_hw_l3_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); struct arch_mbm_state *am; am = get_arch_mbm_state(hw_dom, rmid, eventid); @@ -324,7 +324,7 @@ void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, } } -int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d, +int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d, u32 unused, u32 rmid, int cntr_id, enum resctrl_event_id eventid, u64 *val) { @@ -354,7 +354,7 @@ int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d, * must adjust RMID counter numbers based on SNC node. See * logical_rmid_to_physical_rmid() for code that does this. */ -void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d) +void arch_mon_domain_online(struct rdt_resource *r, struct rdt_l3_mon_domain *d) { if (snc_nodes_per_l3_cache > 1) msr_clear_bit(MSR_RMID_SNC_CONFIG, 0); @@ -516,7 +516,7 @@ static void resctrl_abmc_set_one_amd(void *arg) */ static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable) { - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; lockdep_assert_cpus_held(); @@ -555,11 +555,11 @@ static void resctrl_abmc_config_one_amd(void *info) /* * Send an IPI to the domain to assign the counter to RMID, event pair. */ -void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, +void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, enum resctrl_event_id evtid, u32 rmid, u32 closid, u32 cntr_id, bool assign) { - struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + struct rdt_hw_l3_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); union l3_qos_abmc_cfg abmc_cfg = { 0 }; struct arch_mbm_state *am; diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 9242a2982e77..a3c734fe656e 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -600,9 +600,9 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) struct kernfs_open_file *of = m->private; enum resctrl_res_level resid; enum resctrl_event_id evtid; + struct rdt_l3_mon_domain *d; struct rdt_domain_hdr *hdr; struct rmid_read rr = {0}; - struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; int domid, cpu, ret = 0; struct rdt_resource *r; diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 9912b774a580..af47b6ddef62 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -369,7 +369,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, int resctrl_mon_resource_init(void); -void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, +void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms, int exclude_cpu); @@ -377,14 +377,14 @@ void mbm_handle_overflow(struct work_struct *work); bool is_mba_sc(struct rdt_resource *r); -void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, +void cqm_setup_limbo_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms, int exclude_cpu); void cqm_handle_limbo(struct work_struct *work); -bool has_busy_rmid(struct rdt_mon_domain *d); +bool has_busy_rmid(struct rdt_l3_mon_domain *d); -void __check_limbo(struct rdt_mon_domain *d, bool force_free); +void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free); void resctrl_file_fflags_init(const char *config, unsigned long fflags); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index e1c12201388f..9edbe9805d33 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -130,7 +130,7 @@ static void limbo_release_entry(struct rmid_entry *entry) * decrement the count. If the busy count gets to zero on an RMID, we * free the RMID */ -void __check_limbo(struct rdt_mon_domain *d, bool force_free) +void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); u32 idx_limit = resctrl_arch_system_num_rmid_idx(); @@ -188,7 +188,7 @@ void __check_limbo(struct rdt_mon_domain *d, bool force_free) resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx); } -bool has_busy_rmid(struct rdt_mon_domain *d) +bool has_busy_rmid(struct rdt_l3_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); @@ -289,7 +289,7 @@ int alloc_rmid(u32 closid) static void add_rmid_to_limbo(struct rmid_entry *entry) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; u32 idx; lockdep_assert_held(&rdtgroup_mutex); @@ -342,7 +342,7 @@ void free_rmid(u32 closid, u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } -static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid, +static struct mbm_state *get_mbm_state(struct rdt_l3_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id evtid) { u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); @@ -362,7 +362,7 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid, * Return: * Valid counter ID on success, or -ENOENT on failure. */ -static int mbm_cntr_get(struct rdt_resource *r, struct rdt_mon_domain *d, +static int mbm_cntr_get(struct rdt_resource *r, struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) { int cntr_id; @@ -389,7 +389,7 @@ static int mbm_cntr_get(struct rdt_resource *r, struct rdt_mon_domain *d, * Return: * Valid counter ID on success, or -ENOSPC on failure. */ -static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d, +static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) { int cntr_id; @@ -408,7 +408,7 @@ static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d, /* * mbm_cntr_free() - Clear the counter ID configuration details in the domain @d. */ -static void mbm_cntr_free(struct rdt_mon_domain *d, int cntr_id) +static void mbm_cntr_free(struct rdt_l3_mon_domain *d, int cntr_id) { memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg)); } @@ -418,7 +418,7 @@ static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) int cpu = smp_processor_id(); u32 closid = rdtgrp->closid; u32 rmid = rdtgrp->mon.rmid; - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; int cntr_id = -ENOENT; struct mbm_state *m; u64 tval = 0; @@ -427,7 +427,7 @@ static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) rr->err = -EIO; return -EINVAL; } - d = container_of(rr->hdr, struct rdt_mon_domain, hdr); + d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr); if (rr->is_mbm_cntr) { cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evtid); @@ -470,7 +470,7 @@ static int __l3_mon_event_count_sum(struct rdtgroup *rdtgrp, struct rmid_read *r int cpu = smp_processor_id(); u32 closid = rdtgrp->closid; u32 rmid = rdtgrp->mon.rmid; - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; u64 tval = 0; int err, ret; @@ -545,12 +545,12 @@ static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) u64 cur_bw, bytes, cur_bytes; u32 closid = rdtgrp->closid; u32 rmid = rdtgrp->mon.rmid; - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; struct mbm_state *m; if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) return; - d = container_of(rr->hdr, struct rdt_mon_domain, hdr); + d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr); m = get_mbm_state(d, closid, rmid, rr->evtid); if (WARN_ON_ONCE(!m)) return; @@ -650,7 +650,7 @@ static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, * throttle MSRs already have low percentage values. To avoid * unnecessarily restricting such rdtgroups, we also increase the bandwidth. */ -static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) +static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_l3_mon_domain *dom_mbm) { u32 closid, rmid, cur_msr_val, new_msr_val; struct mbm_state *pmbm_data, *cmbm_data; @@ -718,7 +718,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val); } -static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d, +static void mbm_update_one_event(struct rdt_resource *r, struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) { struct rmid_read rr = {0}; @@ -750,7 +750,7 @@ static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain * resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); } -static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, +static void mbm_update(struct rdt_resource *r, struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp) { /* @@ -771,12 +771,12 @@ static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, void cqm_handle_limbo(struct work_struct *work) { unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL); - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; cpus_read_lock(); mutex_lock(&rdtgroup_mutex); - d = container_of(work, struct rdt_mon_domain, cqm_limbo.work); + d = container_of(work, struct rdt_l3_mon_domain, cqm_limbo.work); __check_limbo(d, false); @@ -799,7 +799,7 @@ void cqm_handle_limbo(struct work_struct *work) * @exclude_cpu: Which CPU the handler should not run on, * RESCTRL_PICK_ANY_CPU to pick any CPU. */ -void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, +void cqm_setup_limbo_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms, int exclude_cpu) { unsigned long delay = msecs_to_jiffies(delay_ms); @@ -816,7 +816,7 @@ void mbm_handle_overflow(struct work_struct *work) { unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL); struct rdtgroup *prgrp, *crgrp; - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; struct list_head *head; struct rdt_resource *r; @@ -831,7 +831,7 @@ void mbm_handle_overflow(struct work_struct *work) goto out_unlock; r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - d = container_of(work, struct rdt_mon_domain, mbm_over.work); + d = container_of(work, struct rdt_l3_mon_domain, mbm_over.work); list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { mbm_update(r, d, prgrp); @@ -865,7 +865,7 @@ out_unlock: * @exclude_cpu: Which CPU the handler should not run on, * RESCTRL_PICK_ANY_CPU to pick any CPU. */ -void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, +void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms, int exclude_cpu) { unsigned long delay = msecs_to_jiffies(delay_ms); @@ -1120,7 +1120,7 @@ out_unlock: * mbm_cntr_free_all() - Clear all the counter ID configuration details in the * domain @d. Called when mbm_assign_mode is changed. */ -static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_mon_domain *d) +static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d) { memset(d->cntr_cfg, 0, sizeof(*d->cntr_cfg) * r->mon.num_mbm_cntrs); } @@ -1129,7 +1129,7 @@ static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_mon_domain *d) * resctrl_reset_rmid_all() - Reset all non-architecture states for all the * supported RMIDs. */ -static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d) +static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); enum resctrl_event_id evt; @@ -1150,7 +1150,7 @@ static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain * Assign the counter if @assign is true else unassign the counter. Reset the * associated non-architectural state. */ -static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, +static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, enum resctrl_event_id evtid, u32 rmid, u32 closid, u32 cntr_id, bool assign) { @@ -1170,7 +1170,7 @@ static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain * * Return: * 0 on success, < 0 on failure. */ -static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, +static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp, struct mon_evt *mevt) { int cntr_id; @@ -1205,7 +1205,7 @@ static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_mon_dom * Return: * 0 on success, < 0 on failure. */ -static int rdtgroup_assign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, +static int rdtgroup_assign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp, struct mon_evt *mevt) { struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid); @@ -1255,7 +1255,7 @@ void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp) * rdtgroup_free_unassign_cntr() - Unassign and reset the counter ID configuration * for the event pointed to by @mevt within the domain @d and resctrl group @rdtgrp. */ -static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, +static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp, struct mon_evt *mevt) { int cntr_id; @@ -1276,7 +1276,7 @@ static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_mon_d * the event structure @mevt from the domain @d and the group @rdtgrp. Unassign * the counters from all the domains if @d is NULL else unassign from @d. */ -static void rdtgroup_unassign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, +static void rdtgroup_unassign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp, struct mon_evt *mevt) { struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid); @@ -1351,7 +1351,7 @@ next_config: static void rdtgroup_update_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) { - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; int cntr_id; list_for_each_entry(d, &r->mon_domains, hdr.list) { @@ -1457,7 +1457,7 @@ ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct rdt_resource *r = rdt_kn_parent_priv(of->kn); - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; int ret = 0; bool enable; @@ -1530,7 +1530,7 @@ int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { struct rdt_resource *r = rdt_kn_parent_priv(of->kn); - struct rdt_mon_domain *dom; + struct rdt_l3_mon_domain *dom; bool sep = false; cpus_read_lock(); @@ -1554,7 +1554,7 @@ int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { struct rdt_resource *r = rdt_kn_parent_priv(of->kn); - struct rdt_mon_domain *dom; + struct rdt_l3_mon_domain *dom; bool sep = false; u32 cntrs, i; int ret = 0; @@ -1595,7 +1595,7 @@ out_unlock: int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; struct rdtgroup *rdtgrp; struct mon_evt *mevt; int ret = 0; @@ -1658,7 +1658,7 @@ static struct mon_evt *mbm_get_mon_event_by_name(struct rdt_resource *r, char *n return NULL; } -static int rdtgroup_modify_assign_state(char *assign, struct rdt_mon_domain *d, +static int rdtgroup_modify_assign_state(char *assign, struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp, struct mon_evt *mevt) { int ret = 0; @@ -1684,7 +1684,7 @@ static int rdtgroup_modify_assign_state(char *assign, struct rdt_mon_domain *d, static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup *rdtgrp, char *event, char *tok) { - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; unsigned long dom_id = 0; char *dom_str, *id_str; struct mon_evt *mevt; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 89ffe54fb0fc..2ed435db1923 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1640,7 +1640,7 @@ static void mondata_config_read(struct resctrl_mon_config_info *mon_info) static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) { struct resctrl_mon_config_info mon_info; - struct rdt_mon_domain *dom; + struct rdt_l3_mon_domain *dom; bool sep = false; cpus_read_lock(); @@ -1688,7 +1688,7 @@ static int mbm_local_bytes_config_show(struct kernfs_open_file *of, } static void mbm_config_write_domain(struct rdt_resource *r, - struct rdt_mon_domain *d, u32 evtid, u32 val) + struct rdt_l3_mon_domain *d, u32 evtid, u32 val) { struct resctrl_mon_config_info mon_info = {0}; @@ -1729,8 +1729,8 @@ static void mbm_config_write_domain(struct rdt_resource *r, static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) { char *dom_str = NULL, *id_str; + struct rdt_l3_mon_domain *d; unsigned long dom_id, val; - struct rdt_mon_domain *d; /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); @@ -2781,7 +2781,7 @@ static int rdt_get_tree(struct fs_context *fc) { struct rdt_fs_context *ctx = rdt_fc2context(fc); unsigned long flags = RFTYPE_CTRL_BASE; - struct rdt_mon_domain *dom; + struct rdt_l3_mon_domain *dom; struct rdt_resource *r; int ret; @@ -3232,7 +3232,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, struct rdt_domain_hdr *hdr) { struct rdtgroup *prgrp, *crgrp; - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; char subname[32]; bool snc_mode; char name[32]; @@ -3240,7 +3240,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) return; - d = container_of(hdr, struct rdt_mon_domain, hdr); + d = container_of(hdr, struct rdt_l3_mon_domain, hdr); snc_mode = r->mon_scope == RESCTRL_L3_NODE; sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : hdr->id); if (snc_mode) @@ -3258,8 +3258,8 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_domain_hdr *hdr, struct rdt_resource *r, struct rdtgroup *prgrp, bool do_sum) { + struct rdt_l3_mon_domain *d; struct rmid_read rr = {0}; - struct rdt_mon_domain *d; struct mon_data *priv; struct mon_evt *mevt; int ret, domid; @@ -3267,7 +3267,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_domain_hdr *hdr, if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) return -EINVAL; - d = container_of(hdr, struct rdt_mon_domain, hdr); + d = container_of(hdr, struct rdt_l3_mon_domain, hdr); for_each_mon_event(mevt) { if (mevt->rid != r->rid || !mevt->enabled) continue; @@ -3292,7 +3292,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, struct rdt_resource *r, struct rdtgroup *prgrp) { struct kernfs_node *kn, *ckn; - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; char name[32]; bool snc_mode; int ret = 0; @@ -3302,7 +3302,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) return -EINVAL; - d = container_of(hdr, struct rdt_mon_domain, hdr); + d = container_of(hdr, struct rdt_l3_mon_domain, hdr); snc_mode = r->mon_scope == RESCTRL_L3_NODE; sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); kn = kernfs_find_and_get(parent_kn, name); @@ -4246,7 +4246,7 @@ static void rdtgroup_setup_default(void) mutex_unlock(&rdtgroup_mutex); } -static void domain_destroy_mon_state(struct rdt_mon_domain *d) +static void domain_destroy_mon_state(struct rdt_l3_mon_domain *d) { int idx; @@ -4270,14 +4270,14 @@ void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr) { - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; mutex_lock(&rdtgroup_mutex); if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) goto out_unlock; - d = container_of(hdr, struct rdt_mon_domain, hdr); + d = container_of(hdr, struct rdt_l3_mon_domain, hdr); /* * If resctrl is mounted, remove all the @@ -4319,7 +4319,7 @@ out_unlock: * * Returns 0 for success, or -ENOMEM. */ -static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d) +static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_l3_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); size_t tsize = sizeof(*d->mbm_states[0]); @@ -4377,7 +4377,7 @@ int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr) { - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; int err = -EINVAL; mutex_lock(&rdtgroup_mutex); @@ -4385,7 +4385,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) goto out_unlock; - d = container_of(hdr, struct rdt_mon_domain, hdr); + d = container_of(hdr, struct rdt_l3_mon_domain, hdr); err = domain_setup_mon_state(r, d); if (err) goto out_unlock; @@ -4432,10 +4432,10 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) } } -static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, - struct rdt_resource *r) +static struct rdt_l3_mon_domain *get_mon_domain_from_cpu(int cpu, + struct rdt_resource *r) { - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; lockdep_assert_cpus_held(); @@ -4451,7 +4451,7 @@ static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, void resctrl_offline_cpu(unsigned int cpu) { struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_mon_domain *d; + struct rdt_l3_mon_domain *d; struct rdtgroup *rdtgrp; mutex_lock(&rdtgroup_mutex); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 9b9877fb3238..79aaaabcdd3f 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -178,7 +178,7 @@ struct mbm_cntr_cfg { }; /** - * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource + * struct rdt_l3_mon_domain - group of CPUs sharing RDT_RESOURCE_L3 monitoring * @hdr: common header for different domain types * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold @@ -192,7 +192,7 @@ struct mbm_cntr_cfg { * @cntr_cfg: array of assignable counters' configuration (indexed * by counter ID) */ -struct rdt_mon_domain { +struct rdt_l3_mon_domain { struct rdt_domain_hdr hdr; unsigned int ci_id; unsigned long *rmid_busy_llc; @@ -367,10 +367,10 @@ struct resctrl_cpu_defaults { }; struct resctrl_mon_config_info { - struct rdt_resource *r; - struct rdt_mon_domain *d; - u32 evtid; - u32 mon_config; + struct rdt_resource *r; + struct rdt_l3_mon_domain *d; + u32 evtid; + u32 mon_config; }; /** @@ -585,7 +585,7 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id, * * This can be called from any CPU. */ -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id eventid); @@ -598,7 +598,7 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, * * This can be called from any CPU. */ -void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d); +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d); /** * resctrl_arch_reset_all_ctrls() - Reset the control for each CLOSID to its @@ -624,7 +624,7 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r); * * This can be called from any CPU. */ -void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, +void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, enum resctrl_event_id evtid, u32 rmid, u32 closid, u32 cntr_id, bool assign); @@ -647,7 +647,7 @@ void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, * Return: * 0 on success, or -EIO, -EINVAL etc on error. */ -int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d, +int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d, u32 closid, u32 rmid, int cntr_id, enum resctrl_event_id eventid, u64 *val); @@ -662,7 +662,7 @@ int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d, * * This can be called from any CPU. */ -void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, +void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, u32 closid, u32 rmid, int cntr_id, enum resctrl_event_id eventid); -- cgit v1.2.3 From ab0308aee3819a3eccde42f9eb5bb01d6733be38 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Dec 2025 09:20:58 -0800 Subject: x86,fs/resctrl: Handle events that can be read from any CPU resctrl assumes that monitor events can only be read from a CPU in the cpumask_t set of each domain. This is true for x86 events accessed with an MSR interface, but may not be true for other access methods such as MMIO. Introduce and use flag mon_evt::any_cpu, settable by architecture, that indicates there are no restrictions on which CPU can read that event. This flag is not supported by the L3 event reading that requires to be run on a CPU that belongs to the L3 domain of the event being read. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com --- arch/x86/kernel/cpu/resctrl/core.c | 6 +++--- fs/resctrl/ctrlmondata.c | 6 ++++++ fs/resctrl/internal.h | 2 ++ fs/resctrl/monitor.c | 4 +++- include/linux/resctrl.h | 2 +- 5 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index b3a2dc56155d..bd4a98106153 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -902,15 +902,15 @@ static __init bool get_rdt_mon_resources(void) bool ret = false; if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) { - resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID); + resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false); ret = true; } if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) { - resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID); + resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false); ret = true; } if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) { - resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); + resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false); ret = true; } if (rdt_cpu_has(X86_FEATURE_ABMC)) diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 7f9b2fed117a..2c69fcd70eeb 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -578,6 +578,11 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, } } + if (evt->any_cpu) { + mon_event_count(rr); + goto out_ctx_free; + } + cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU); /* @@ -591,6 +596,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, else smp_call_on_cpu(cpu, smp_mon_event_count, rr, false); +out_ctx_free: if (rr->arch_mon_ctx) resctrl_arch_mon_ctx_free(r, evt->evtid, rr->arch_mon_ctx); } diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 86cf38ab08a7..fb0b6e40d022 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -61,6 +61,7 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) * READS_TO_REMOTE_MEM) being tracked by @evtid. * Only valid if @evtid is an MBM event. * @configurable: true if the event is configurable + * @any_cpu: true if the event can be read from any CPU * @enabled: true if the event is enabled */ struct mon_evt { @@ -69,6 +70,7 @@ struct mon_evt { char *name; u32 evt_cfg; bool configurable; + bool any_cpu; bool enabled; }; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 340b847ab397..8c76ac133bca 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -518,6 +518,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) { switch (rr->r->rid) { case RDT_RESOURCE_L3: + WARN_ON_ONCE(rr->evt->any_cpu); if (rr->hdr) return __l3_mon_event_count(rdtgrp, rr); else @@ -987,7 +988,7 @@ struct mon_evt mon_event_all[QOS_NUM_EVENTS] = { }, }; -void resctrl_enable_mon_event(enum resctrl_event_id eventid) +void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu) { if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS)) return; @@ -996,6 +997,7 @@ void resctrl_enable_mon_event(enum resctrl_event_id eventid) return; } + mon_event_all[eventid].any_cpu = any_cpu; mon_event_all[eventid].enabled = true; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 79aaaabcdd3f..22c5d07fe9ff 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -412,7 +412,7 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); -void resctrl_enable_mon_event(enum resctrl_event_id eventid); +void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu); bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid); -- cgit v1.2.3 From e37c9a3dc9f9645532780d5ef34ea3b8fcf9ddef Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Dec 2025 09:20:59 -0800 Subject: x86,fs/resctrl: Support binary fixed point event counters resctrl assumes that all monitor events can be displayed as unsigned decimal integers. Hardware architecture counters may provide some telemetry events with greater precision where the event is not a simple count, but is a measurement of some sort (e.g. Joules for energy consumed). Add a new argument to resctrl_enable_mon_event() for architecture code to inform the file system that the value for a counter is a fixed-point value with a specific number of binary places. Only allow architecture to use floating point format on events that the file system has marked with mon_evt::is_floating_point which reflects the contract with user space on how the event values are displayed. Display fixed point values with values rounded to ceil(binary_bits * log10(2)) decimal places. Special case for zero binary bits to print "{value}.0". Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com --- arch/x86/kernel/cpu/resctrl/core.c | 6 ++-- fs/resctrl/ctrlmondata.c | 74 ++++++++++++++++++++++++++++++++++++++ fs/resctrl/internal.h | 8 +++++ fs/resctrl/monitor.c | 10 ++++-- include/linux/resctrl.h | 3 +- 5 files changed, 95 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index bd4a98106153..9222eee7ce07 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -902,15 +902,15 @@ static __init bool get_rdt_mon_resources(void) bool ret = false; if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) { - resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false); + resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false, 0); ret = true; } if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) { - resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false); + resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false, 0); ret = true; } if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) { - resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false); + resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false, 0); ret = true; } if (rdt_cpu_has(X86_FEATURE_ABMC)) diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 2c69fcd70eeb..f319fd1a6de3 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -601,6 +602,77 @@ out_ctx_free: resctrl_arch_mon_ctx_free(r, evt->evtid, rr->arch_mon_ctx); } +/* + * Decimal place precision to use for each number of fixed-point + * binary bits computed from ceil(binary_bits * log10(2)) except + * binary_bits == 0 which will print "value.0" + */ +static const unsigned int decplaces[MAX_BINARY_BITS + 1] = { + [0] = 1, + [1] = 1, + [2] = 1, + [3] = 1, + [4] = 2, + [5] = 2, + [6] = 2, + [7] = 3, + [8] = 3, + [9] = 3, + [10] = 4, + [11] = 4, + [12] = 4, + [13] = 4, + [14] = 5, + [15] = 5, + [16] = 5, + [17] = 6, + [18] = 6, + [19] = 6, + [20] = 7, + [21] = 7, + [22] = 7, + [23] = 7, + [24] = 8, + [25] = 8, + [26] = 8, + [27] = 9 +}; + +static void print_event_value(struct seq_file *m, unsigned int binary_bits, u64 val) +{ + unsigned long long frac = 0; + + if (binary_bits) { + /* Mask off the integer part of the fixed-point value. */ + frac = val & GENMASK_ULL(binary_bits - 1, 0); + + /* + * Multiply by 10^{desired decimal places}. The integer part of + * the fixed point value is now almost what is needed. + */ + frac *= int_pow(10ull, decplaces[binary_bits]); + + /* + * Round to nearest by adding a value that would be a "1" in the + * binary_bits + 1 place. Integer part of fixed point value is + * now the needed value. + */ + frac += 1ull << (binary_bits - 1); + + /* + * Extract the integer part of the value. This is the decimal + * representation of the original fixed-point fractional value. + */ + frac >>= binary_bits; + } + + /* + * "frac" is now in the range [0 .. 10^decplaces). I.e. string + * representation will fit into chosen number of decimal places. + */ + seq_printf(m, "%llu.%0*llu\n", val >> binary_bits, decplaces[binary_bits], frac); +} + int rdtgroup_mondata_show(struct seq_file *m, void *arg) { struct kernfs_open_file *of = m->private; @@ -678,6 +750,8 @@ checkresult: seq_puts(m, "Unavailable\n"); else if (rr.err == -ENOENT) seq_puts(m, "Unassigned\n"); + else if (evt->is_floating_point) + print_event_value(m, evt->binary_bits, rr.val); else seq_printf(m, "%llu\n", rr.val); diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index fb0b6e40d022..14e5a9ed1fbd 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -62,6 +62,9 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) * Only valid if @evtid is an MBM event. * @configurable: true if the event is configurable * @any_cpu: true if the event can be read from any CPU + * @is_floating_point: event values are displayed in floating point format + * @binary_bits: number of fixed-point binary bits from architecture, + * only valid if @is_floating_point is true * @enabled: true if the event is enabled */ struct mon_evt { @@ -71,6 +74,8 @@ struct mon_evt { u32 evt_cfg; bool configurable; bool any_cpu; + bool is_floating_point; + unsigned int binary_bits; bool enabled; }; @@ -79,6 +84,9 @@ extern struct mon_evt mon_event_all[QOS_NUM_EVENTS]; #define for_each_mon_event(mevt) for (mevt = &mon_event_all[QOS_FIRST_EVENT]; \ mevt < &mon_event_all[QOS_NUM_EVENTS]; mevt++) +/* Limit for mon_evt::binary_bits */ +#define MAX_BINARY_BITS 27 + /** * struct mon_data - Monitoring details for each event file. * @list: Member of the global @mon_data_kn_priv_list list. diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 8c76ac133bca..844cf6875f60 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -988,16 +988,22 @@ struct mon_evt mon_event_all[QOS_NUM_EVENTS] = { }, }; -void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu) +void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, unsigned int binary_bits) { - if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS)) + if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS || + binary_bits > MAX_BINARY_BITS)) return; if (mon_event_all[eventid].enabled) { pr_warn("Duplicate enable for event %d\n", eventid); return; } + if (binary_bits && !mon_event_all[eventid].is_floating_point) { + pr_warn("Event %d may not be floating point\n", eventid); + return; + } mon_event_all[eventid].any_cpu = any_cpu; + mon_event_all[eventid].binary_bits = binary_bits; mon_event_all[eventid].enabled = true; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 22c5d07fe9ff..c43526cdf304 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -412,7 +412,8 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); -void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu); +void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, + unsigned int binary_bits); bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid); -- cgit v1.2.3 From 39208e73a40e0e81a5b12ddc11157c0a414df307 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 8 Jan 2026 09:42:25 -0800 Subject: x86,fs/resctrl: Add an architectural hook called for first mount Enumeration of Intel telemetry events is an asynchronous process involving several mutually dependent drivers added as auxiliary devices during the device_initcall() phase of Linux boot. The process finishes after the probe functions of these drivers completes. But this happens after resctrl_arch_late_init() is executed. Tracing the enumeration process shows that it does complete a full seven seconds before the earliest possible mount of the resctrl file system (when included in /etc/fstab for automatic mount by systemd). Add a hook for use by telemetry event enumeration and initialization and run it once at the beginning of resctrl mount without any locks held. The architecture is responsible for any required locking. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20260105191711.GBaVwON5nZn-uO6Sqg@fat_crate.local --- arch/x86/kernel/cpu/resctrl/core.c | 4 ++++ fs/resctrl/rdtgroup.c | 3 +++ include/linux/resctrl.h | 6 ++++++ 3 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 9222eee7ce07..a2b7f869b4b0 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -726,6 +726,10 @@ static int resctrl_arch_offline_cpu(unsigned int cpu) return 0; } +void resctrl_arch_pre_mount(void) +{ +} + enum { RDT_FLAG_CMT, RDT_FLAG_MBM_TOTAL, diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 771e40f02ba6..0e3b8bcf4e53 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -2785,6 +2786,8 @@ static int rdt_get_tree(struct fs_context *fc) struct rdt_resource *r; int ret; + DO_ONCE_SLEEPABLE(resctrl_arch_pre_mount); + cpus_read_lock(); mutex_lock(&rdtgroup_mutex); /* diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index c43526cdf304..2f938a5a16f8 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -514,6 +514,12 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *h void resctrl_online_cpu(unsigned int cpu); void resctrl_offline_cpu(unsigned int cpu); +/* + * Architecture hook called at beginning of first file system mount attempt. + * No locks are held. + */ +void resctrl_arch_pre_mount(void); + /** * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid * for this resource and domain. -- cgit v1.2.3 From 2e53ad66686a46b141c3395719afeee3057ffe2f Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Dec 2025 09:21:01 -0800 Subject: x86,fs/resctrl: Add and initialize a resource for package scope monitoring Add a new PERF_PKG resource and introduce package level scope for monitoring telemetry events so that CPU hotplug notifiers can build domains at the package granularity. Use the physical package ID available via topology_physical_package_id() to identify the monitoring domains with package level scope. This enables user space to use: /sys/devices/system/cpu/cpuX/topology/physical_package_id to identify the monitoring domain a CPU is associated with. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com --- arch/x86/kernel/cpu/resctrl/core.c | 10 ++++++++++ fs/resctrl/internal.h | 2 ++ fs/resctrl/rdtgroup.c | 2 ++ include/linux/resctrl.h | 2 ++ 4 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index a2b7f869b4b0..f3d7e2263630 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -100,6 +100,14 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .schema_fmt = RESCTRL_SCHEMA_RANGE, }, }, + [RDT_RESOURCE_PERF_PKG] = + { + .r_resctrl = { + .name = "PERF_PKG", + .mon_scope = RESCTRL_PACKAGE, + .mon_domains = mon_domain_init(RDT_RESOURCE_PERF_PKG), + }, + }, }; u32 resctrl_arch_system_num_rmid_idx(void) @@ -440,6 +448,8 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) return get_cpu_cacheinfo_id(cpu, scope); case RESCTRL_L3_NODE: return cpu_to_node(cpu); + case RESCTRL_PACKAGE: + return topology_physical_package_id(cpu); default: break; } diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 14e5a9ed1fbd..0110d1175398 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -255,6 +255,8 @@ struct rdtgroup { #define RFTYPE_ASSIGN_CONFIG BIT(11) +#define RFTYPE_RES_PERF_PKG BIT(12) + #define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL) #define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 0e3b8bcf4e53..a06cefd2a09e 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -2396,6 +2396,8 @@ static unsigned long fflags_from_resource(struct rdt_resource *r) case RDT_RESOURCE_MBA: case RDT_RESOURCE_SMBA: return RFTYPE_RES_MB; + case RDT_RESOURCE_PERF_PKG: + return RFTYPE_RES_PERF_PKG; } return WARN_ON_ONCE(1); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 2f938a5a16f8..861e63e868a1 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -53,6 +53,7 @@ enum resctrl_res_level { RDT_RESOURCE_L2, RDT_RESOURCE_MBA, RDT_RESOURCE_SMBA, + RDT_RESOURCE_PERF_PKG, /* Must be the last */ RDT_NUM_RESOURCES, @@ -270,6 +271,7 @@ enum resctrl_scope { RESCTRL_L2_CACHE = 2, RESCTRL_L3_CACHE = 3, RESCTRL_L3_NODE, + RESCTRL_PACKAGE, }; /** -- cgit v1.2.3 From 8f6b6ad69b50bf16bb762ffafbfa44a4884f9a17 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Dec 2025 09:21:04 -0800 Subject: x86,fs/resctrl: Fill in details of events for performance and energy GUIDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The telemetry event aggregators of the Intel Clearwater Forest CPU support two RMID-based feature types: "energy" with GUID 0x26696143¹, and "perf" with GUID 0x26557651². The event counter offsets in an aggregator's MMIO space are arranged in groups for each RMID. E.g., the "energy" counters for GUID 0x26696143 are arranged like this: MMIO offset:0x0000 Counter for RMID 0 PMT_EVENT_ENERGY MMIO offset:0x0008 Counter for RMID 0 PMT_EVENT_ACTIVITY MMIO offset:0x0010 Counter for RMID 1 PMT_EVENT_ENERGY MMIO offset:0x0018 Counter for RMID 1 PMT_EVENT_ACTIVITY ... MMIO offset:0x23F0 Counter for RMID 575 PMT_EVENT_ENERGY MMIO offset:0x23F8 Counter for RMID 575 PMT_EVENT_ACTIVITY After all counters there are three status registers that provide indications of how many times an aggregator was unable to process event counts, the time stamp for the most recent loss of data, and the time stamp of the most recent successful update. MMIO offset:0x2400 AGG_DATA_LOSS_COUNT MMIO offset:0x2408 AGG_DATA_LOSS_TIMESTAMP MMIO offset:0x2410 LAST_UPDATE_TIMESTAMP Define event_group structures for both of these aggregator types and define the events tracked by the aggregators in the file system code. PMT_EVENT_ENERGY and PMT_EVENT_ACTIVITY are produced in fixed point format. File system code must output as floating point values. ¹https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml ²https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-PERF/cwf_aggregator.xml [ bp: Massage commit message. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com --- arch/x86/kernel/cpu/resctrl/intel_aet.c | 66 +++++++++++++++++++++++++++++++++ fs/resctrl/monitor.c | 35 +++++++++-------- include/linux/resctrl_types.h | 11 ++++++ 3 files changed, 97 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c index 404564739bef..8e042b530c91 100644 --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c @@ -11,15 +11,33 @@ #define pr_fmt(fmt) "resctrl: " fmt +#include #include #include #include #include #include +#include #include +#include #include "internal.h" +/** + * struct pmt_event - Telemetry event. + * @id: Resctrl event id. + * @idx: Counter index within each per-RMID block of counters. + * @bin_bits: Zero for integer valued events, else number bits in fraction + * part of fixed-point. + */ +struct pmt_event { + enum resctrl_event_id id; + unsigned int idx; + unsigned int bin_bits; +}; + +#define EVT(_id, _idx, _bits) { .id = _id, .idx = _idx, .bin_bits = _bits } + /** * struct event_group - Events with the same feature type ("energy" or "perf") and GUID. * @pfname: PMT feature name ("energy" or "perf") of this event group. @@ -29,14 +47,62 @@ * data for all telemetry regions of type @pfname. * Valid if the system supports the event group, * NULL otherwise. + * @guid: Unique number per XML description file. + * @mmio_size: Number of bytes of MMIO registers for this group. + * @num_events: Number of events in this group. + * @evts: Array of event descriptors. */ struct event_group { /* Data fields for additional structures to manage this group. */ const char *pfname; struct pmt_feature_group *pfg; + + /* Remaining fields initialized from XML file. */ + u32 guid; + size_t mmio_size; + unsigned int num_events; + struct pmt_event evts[] __counted_by(num_events); +}; + +#define XML_MMIO_SIZE(num_rmids, num_events, num_extra_status) \ + (((num_rmids) * (num_events) + (num_extra_status)) * sizeof(u64)) + +/* + * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml + */ +static struct event_group energy_0x26696143 = { + .pfname = "energy", + .guid = 0x26696143, + .mmio_size = XML_MMIO_SIZE(576, 2, 3), + .num_events = 2, + .evts = { + EVT(PMT_EVENT_ENERGY, 0, 18), + EVT(PMT_EVENT_ACTIVITY, 1, 18), + } +}; + +/* + * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-PERF/cwf_aggregator.xml + */ +static struct event_group perf_0x26557651 = { + .pfname = "perf", + .guid = 0x26557651, + .mmio_size = XML_MMIO_SIZE(576, 7, 3), + .num_events = 7, + .evts = { + EVT(PMT_EVENT_STALLS_LLC_HIT, 0, 0), + EVT(PMT_EVENT_C1_RES, 1, 0), + EVT(PMT_EVENT_UNHALTED_CORE_CYCLES, 2, 0), + EVT(PMT_EVENT_STALLS_LLC_MISS, 3, 0), + EVT(PMT_EVENT_AUTO_C6_RES, 4, 0), + EVT(PMT_EVENT_UNHALTED_REF_CYCLES, 5, 0), + EVT(PMT_EVENT_UOPS_RETIRED, 6, 0), + } }; static struct event_group *known_event_groups[] = { + &energy_0x26696143, + &perf_0x26557651, }; #define for_each_event_group(_peg) \ diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 844cf6875f60..9729acacdc19 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -965,27 +965,32 @@ out_unlock: mutex_unlock(&rdtgroup_mutex); } +#define MON_EVENT(_eventid, _name, _res, _fp) \ + [_eventid] = { \ + .name = _name, \ + .evtid = _eventid, \ + .rid = _res, \ + .is_floating_point = _fp, \ +} + /* * All available events. Architecture code marks the ones that * are supported by a system using resctrl_enable_mon_event() * to set .enabled. */ struct mon_evt mon_event_all[QOS_NUM_EVENTS] = { - [QOS_L3_OCCUP_EVENT_ID] = { - .name = "llc_occupancy", - .evtid = QOS_L3_OCCUP_EVENT_ID, - .rid = RDT_RESOURCE_L3, - }, - [QOS_L3_MBM_TOTAL_EVENT_ID] = { - .name = "mbm_total_bytes", - .evtid = QOS_L3_MBM_TOTAL_EVENT_ID, - .rid = RDT_RESOURCE_L3, - }, - [QOS_L3_MBM_LOCAL_EVENT_ID] = { - .name = "mbm_local_bytes", - .evtid = QOS_L3_MBM_LOCAL_EVENT_ID, - .rid = RDT_RESOURCE_L3, - }, + MON_EVENT(QOS_L3_OCCUP_EVENT_ID, "llc_occupancy", RDT_RESOURCE_L3, false), + MON_EVENT(QOS_L3_MBM_TOTAL_EVENT_ID, "mbm_total_bytes", RDT_RESOURCE_L3, false), + MON_EVENT(QOS_L3_MBM_LOCAL_EVENT_ID, "mbm_local_bytes", RDT_RESOURCE_L3, false), + MON_EVENT(PMT_EVENT_ENERGY, "core_energy", RDT_RESOURCE_PERF_PKG, true), + MON_EVENT(PMT_EVENT_ACTIVITY, "activity", RDT_RESOURCE_PERF_PKG, true), + MON_EVENT(PMT_EVENT_STALLS_LLC_HIT, "stalls_llc_hit", RDT_RESOURCE_PERF_PKG, false), + MON_EVENT(PMT_EVENT_C1_RES, "c1_res", RDT_RESOURCE_PERF_PKG, false), + MON_EVENT(PMT_EVENT_UNHALTED_CORE_CYCLES, "unhalted_core_cycles", RDT_RESOURCE_PERF_PKG, false), + MON_EVENT(PMT_EVENT_STALLS_LLC_MISS, "stalls_llc_miss", RDT_RESOURCE_PERF_PKG, false), + MON_EVENT(PMT_EVENT_AUTO_C6_RES, "c6_res", RDT_RESOURCE_PERF_PKG, false), + MON_EVENT(PMT_EVENT_UNHALTED_REF_CYCLES, "unhalted_ref_cycles", RDT_RESOURCE_PERF_PKG, false), + MON_EVENT(PMT_EVENT_UOPS_RETIRED, "uops_retired", RDT_RESOURCE_PERF_PKG, false), }; void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, unsigned int binary_bits) diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index acfe07860b34..a5f56faa18d2 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -50,6 +50,17 @@ enum resctrl_event_id { QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, + /* Intel Telemetry Events */ + PMT_EVENT_ENERGY, + PMT_EVENT_ACTIVITY, + PMT_EVENT_STALLS_LLC_HIT, + PMT_EVENT_C1_RES, + PMT_EVENT_UNHALTED_CORE_CYCLES, + PMT_EVENT_STALLS_LLC_MISS, + PMT_EVENT_AUTO_C6_RES, + PMT_EVENT_UNHALTED_REF_CYCLES, + PMT_EVENT_UOPS_RETIRED, + /* Must be the last */ QOS_NUM_EVENTS, }; -- cgit v1.2.3 From 8ccb1f8fa6a3dfde32cf33e7ded3558014e6cca2 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Dec 2025 09:21:05 -0800 Subject: x86,fs/resctrl: Add architectural event pointer The resctrl file system layer passes the domain, RMID, and event id to the architecture to fetch an event counter. Fetching a telemetry event counter requires additional information that is private to the architecture, for example, the offset into MMIO space from where the counter should be read. Add mon_evt::arch_priv that architecture can use for any private data related to the event. The resctrl filesystem initializes mon_evt::arch_priv when the architecture enables the event and passes it back to architecture when needing to fetch an event counter. Suggested-by: Reinette Chatre Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com --- arch/x86/kernel/cpu/resctrl/core.c | 6 +++--- arch/x86/kernel/cpu/resctrl/monitor.c | 2 +- fs/resctrl/internal.h | 4 ++++ fs/resctrl/monitor.c | 14 ++++++++++---- include/linux/resctrl.h | 7 +++++-- 5 files changed, 23 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 595f7eae9294..509277b17b9e 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -918,15 +918,15 @@ static __init bool get_rdt_mon_resources(void) bool ret = false; if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) { - resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false, 0); + resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false, 0, NULL); ret = true; } if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) { - resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false, 0); + resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false, 0, NULL); ret = true; } if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) { - resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false, 0); + resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false, 0, NULL); ret = true; } if (rdt_cpu_has(X86_FEATURE_ABMC)) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 20605212656c..6929614ba6e6 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -240,7 +240,7 @@ static u64 get_corrected_val(struct rdt_resource *r, struct rdt_l3_mon_domain *d int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, u32 unused, u32 rmid, enum resctrl_event_id eventid, - u64 *val, void *ignored) + void *arch_priv, u64 *val, void *ignored) { struct rdt_hw_l3_mon_domain *hw_dom; struct rdt_l3_mon_domain *d; diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 50d88e91e0da..399f625be67d 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -66,6 +66,9 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) * @binary_bits: number of fixed-point binary bits from architecture, * only valid if @is_floating_point is true * @enabled: true if the event is enabled + * @arch_priv: Architecture private data for this event. + * The @arch_priv provided by the architecture via + * resctrl_enable_mon_event(). */ struct mon_evt { enum resctrl_event_id evtid; @@ -77,6 +80,7 @@ struct mon_evt { bool is_floating_point; unsigned int binary_bits; bool enabled; + void *arch_priv; }; extern struct mon_evt mon_event_all[QOS_NUM_EVENTS]; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 9729acacdc19..af43a33ce4cb 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -137,9 +137,11 @@ void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free) struct rmid_entry *entry; u32 idx, cur_idx = 1; void *arch_mon_ctx; + void *arch_priv; bool rmid_dirty; u64 val = 0; + arch_priv = mon_event_all[QOS_L3_OCCUP_EVENT_ID].arch_priv; arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID); if (IS_ERR(arch_mon_ctx)) { pr_warn_ratelimited("Failed to allocate monitor context: %ld", @@ -160,7 +162,7 @@ void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free) entry = __rmid_entry(idx); if (resctrl_arch_rmid_read(r, &d->hdr, entry->closid, entry->rmid, - QOS_L3_OCCUP_EVENT_ID, &val, + QOS_L3_OCCUP_EVENT_ID, arch_priv, &val, arch_mon_ctx)) { rmid_dirty = true; } else { @@ -456,7 +458,8 @@ static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) rr->evt->evtid, &tval); else rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, closid, rmid, - rr->evt->evtid, &tval, rr->arch_mon_ctx); + rr->evt->evtid, rr->evt->arch_priv, + &tval, rr->arch_mon_ctx); if (rr->err) return rr->err; @@ -501,7 +504,8 @@ static int __l3_mon_event_count_sum(struct rdtgroup *rdtgrp, struct rmid_read *r if (d->ci_id != rr->ci->id) continue; err = resctrl_arch_rmid_read(rr->r, &d->hdr, closid, rmid, - rr->evt->evtid, &tval, rr->arch_mon_ctx); + rr->evt->evtid, rr->evt->arch_priv, + &tval, rr->arch_mon_ctx); if (!err) { rr->val += tval; ret = 0; @@ -993,7 +997,8 @@ struct mon_evt mon_event_all[QOS_NUM_EVENTS] = { MON_EVENT(PMT_EVENT_UOPS_RETIRED, "uops_retired", RDT_RESOURCE_PERF_PKG, false), }; -void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, unsigned int binary_bits) +void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, + unsigned int binary_bits, void *arch_priv) { if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS || binary_bits > MAX_BINARY_BITS)) @@ -1009,6 +1014,7 @@ void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, unsig mon_event_all[eventid].any_cpu = any_cpu; mon_event_all[eventid].binary_bits = binary_bits; + mon_event_all[eventid].arch_priv = arch_priv; mon_event_all[eventid].enabled = true; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 861e63e868a1..2c64a43a36db 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -415,7 +415,7 @@ u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, - unsigned int binary_bits); + unsigned int binary_bits, void *arch_priv); bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid); @@ -532,6 +532,9 @@ void resctrl_arch_pre_mount(void); * only. * @rmid: rmid of the counter to read. * @eventid: eventid to read, e.g. L3 occupancy. + * @arch_priv: Architecture private data for this event. + * The @arch_priv provided by the architecture via + * resctrl_enable_mon_event(). * @val: result of the counter read in bytes. * @arch_mon_ctx: An architecture specific value from * resctrl_arch_mon_ctx_alloc(), for MPAM this identifies @@ -549,7 +552,7 @@ void resctrl_arch_pre_mount(void); */ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, u32 closid, u32 rmid, enum resctrl_event_id eventid, - u64 *val, void *arch_mon_ctx); + void *arch_priv, u64 *val, void *arch_mon_ctx); /** * resctrl_arch_rmid_read_context_check() - warn about invalid contexts -- cgit v1.2.3 From 7e6df9614546ae7eb1f1b2074d7b6039bb01540d Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Dec 2025 09:21:06 -0800 Subject: x86/resctrl: Find and enable usable telemetry events Every event group has a private copy of the data of all telemetry event aggregators (aka "telemetry regions") tracking its feature type. Included may be regions that have the same feature type but tracking different GUID from the event group's. Traverse the event group's telemetry region data and mark all regions that are not usable by the event group as unusable by clearing those regions' MMIO addresses. A region is considered unusable if: 1) GUID does not match the GUID of the event group. 2) Package ID is invalid. 3) The enumerated size of the MMIO region does not match the expected value from the XML description file. Hereafter any telemetry region with an MMIO address is considered valid for the event group it is associated with. Enable all the event group's events as long as there is at least one usable region from where data for its events can be read. Enabling of an event can fail if the same event has already been enabled as part of another event group. It should never happen that the same event is described by different GUID supported by the same system so just WARN (via resctrl_enable_mon_event()) and skip the event. Note that it is architecturally possible that some telemetry events are only supported by a subset of the packages in the system. It is not expected that systems will ever do this. If they do the user will see event files in resctrl that always return "Unavailable". Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com --- arch/x86/kernel/cpu/resctrl/intel_aet.c | 63 +++++++++++++++++++++++++++++++-- fs/resctrl/monitor.c | 10 +++--- include/linux/resctrl.h | 2 +- 3 files changed, 68 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c index 8e042b530c91..7d0bd7b070a7 100644 --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c @@ -16,9 +16,11 @@ #include #include #include +#include #include #include #include +#include #include #include "internal.h" @@ -110,12 +112,69 @@ static struct event_group *known_event_groups[] = { _peg < &known_event_groups[ARRAY_SIZE(known_event_groups)]; \ _peg++) -/* Stub for now */ -static bool enable_events(struct event_group *e, struct pmt_feature_group *p) +static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e) { + if (tr->guid != e->guid) + return true; + if (tr->plat_info.package_id >= topology_max_packages()) { + pr_warn("Bad package %u in guid 0x%x\n", tr->plat_info.package_id, + tr->guid); + return true; + } + if (tr->size != e->mmio_size) { + pr_warn("MMIO space wrong size (%zu bytes) for guid 0x%x. Expected %zu bytes.\n", + tr->size, e->guid, e->mmio_size); + return true; + } + return false; } +static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_group *p) +{ + bool usable_regions = false; + + for (int i = 0; i < p->count; i++) { + if (skip_telem_region(&p->regions[i], e)) { + /* + * Clear the address field of regions that did not pass the checks in + * skip_telem_region() so they will not be used by intel_aet_read_event(). + * This is safe to do because intel_pmt_get_regions_by_feature() allocates + * a new pmt_feature_group structure to return to each caller and only makes + * use of the pmt_feature_group::kref field when intel_pmt_put_feature_group() + * returns the structure. + */ + p->regions[i].addr = NULL; + + continue; + } + usable_regions = true; + } + + return usable_regions; +} + +static bool enable_events(struct event_group *e, struct pmt_feature_group *p) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl; + int skipped_events = 0; + + if (!group_has_usable_regions(e, p)) + return false; + + for (int j = 0; j < e->num_events; j++) { + if (!resctrl_enable_mon_event(e->evts[j].id, true, + e->evts[j].bin_bits, &e->evts[j])) + skipped_events++; + } + if (e->num_events == skipped_events) { + pr_info("No events enabled in %s %s:0x%x\n", r->name, e->pfname, e->guid); + return false; + } + + return true; +} + static enum pmt_feature_id lookup_pfid(const char *pfname) { if (!strcmp(pfname, "energy")) diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index af43a33ce4cb..9af08b673e39 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -997,25 +997,27 @@ struct mon_evt mon_event_all[QOS_NUM_EVENTS] = { MON_EVENT(PMT_EVENT_UOPS_RETIRED, "uops_retired", RDT_RESOURCE_PERF_PKG, false), }; -void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, +bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, unsigned int binary_bits, void *arch_priv) { if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS || binary_bits > MAX_BINARY_BITS)) - return; + return false; if (mon_event_all[eventid].enabled) { pr_warn("Duplicate enable for event %d\n", eventid); - return; + return false; } if (binary_bits && !mon_event_all[eventid].is_floating_point) { pr_warn("Event %d may not be floating point\n", eventid); - return; + return false; } mon_event_all[eventid].any_cpu = any_cpu; mon_event_all[eventid].binary_bits = binary_bits; mon_event_all[eventid].arch_priv = arch_priv; mon_event_all[eventid].enabled = true; + + return true; } bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 2c64a43a36db..451eb45e44f8 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -414,7 +414,7 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); -void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, +bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, unsigned int binary_bits, void *arch_priv); bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid); -- cgit v1.2.3 From 67640e333b983298be624a41c43e3a8ed4713a73 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Dec 2025 09:21:12 -0800 Subject: x86/resctrl: Handle number of RMIDs supported by RDT_RESOURCE_PERF_PKG There are now three meanings for "number of RMIDs": 1) The number for legacy features enumerated by CPUID leaf 0xF. This is the maximum number of distinct values that can be loaded into MSR_IA32_PQR_ASSOC. Note that systems with Sub-NUMA Cluster mode enabled will force scaling down the CPUID enumerated value by the number of SNC nodes per L3-cache. 2) The number of registers in MMIO space for each event. This is enumerated in the XML files and is the value initialized into event_group::num_rmid. 3) The number of "hardware counters" (this isn't a strictly accurate description of how things work, but serves as a useful analogy that does describe the limitations) feeding to those MMIO registers. This is enumerated in telemetry_region::num_rmids returned by intel_pmt_get_regions_by_feature(). Event groups with insufficient "hardware counters" to track all RMIDs are difficult for users to use, since the system may reassign "hardware counters" at any time. This means that users cannot reliably collect two consecutive event counts to compute the rate at which events are occurring. Disable such event groups by default. The user may override this with a command line "rdt=" option. In this case limit an under-resourced event group's number of possible monitor resource groups to the lowest number of "hardware counters". Scan all enabled event groups and assign the RDT_RESOURCE_PERF_PKG resource "num_rmid" value to the smallest of these values as this value will be used later to compare against the number of RMIDs supported by other resources to determine how many monitoring resource groups are supported. N.B. Change type of resctrl_mon::num_rmid to u32 to match its usage and the type of event_group::num_rmid so that min(r->num_rmid, e->num_rmid) won't complain about mixing signed and unsigned types. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com --- arch/x86/kernel/cpu/resctrl/intel_aet.c | 53 ++++++++++++++++++++++++++++++++- fs/resctrl/rdtgroup.c | 2 +- include/linux/resctrl.h | 2 +- 3 files changed, 54 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c index dc25e8d2527d..aba997135003 100644 --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -60,10 +61,14 @@ struct pmt_event { * Valid if the system supports the event group, * NULL otherwise. * @force_off: True when "rdt" command line or architecture code disables - * this event group. + * this event group due to insufficient RMIDs. * @force_on: True when "rdt" command line overrides disable of this * event group. * @guid: Unique number per XML description file. + * @num_rmid: Number of RMIDs supported by this group. May be + * adjusted downwards if enumeration from + * intel_pmt_get_regions_by_feature() indicates fewer + * RMIDs can be tracked simultaneously. * @mmio_size: Number of bytes of MMIO registers for this group. * @num_events: Number of events in this group. * @evts: Array of event descriptors. @@ -76,6 +81,7 @@ struct event_group { /* Remaining fields initialized from XML file. */ u32 guid; + u32 num_rmid; size_t mmio_size; unsigned int num_events; struct pmt_event evts[] __counted_by(num_events); @@ -90,6 +96,7 @@ struct event_group { static struct event_group energy_0x26696143 = { .pfname = "energy", .guid = 0x26696143, + .num_rmid = 576, .mmio_size = XML_MMIO_SIZE(576, 2, 3), .num_events = 2, .evts = { @@ -104,6 +111,7 @@ static struct event_group energy_0x26696143 = { static struct event_group perf_0x26557651 = { .pfname = "perf", .guid = 0x26557651, + .num_rmid = 576, .mmio_size = XML_MMIO_SIZE(576, 7, 3), .num_events = 7, .evts = { @@ -198,6 +206,23 @@ static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_g return usable_regions; } +static bool all_regions_have_sufficient_rmid(struct event_group *e, struct pmt_feature_group *p) +{ + struct telemetry_region *tr; + + for (int i = 0; i < p->count; i++) { + if (!p->regions[i].addr) + continue; + tr = &p->regions[i]; + if (tr->num_rmids < e->num_rmid) { + e->force_off = true; + return false; + } + } + + return true; +} + static bool enable_events(struct event_group *e, struct pmt_feature_group *p) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl; @@ -209,6 +234,27 @@ static bool enable_events(struct event_group *e, struct pmt_feature_group *p) if (!group_has_usable_regions(e, p)) return false; + /* + * Only enable event group with insufficient RMIDs if the user requested + * it from the kernel command line. + */ + if (!all_regions_have_sufficient_rmid(e, p) && !e->force_on) { + pr_info("%s %s:0x%x monitoring not enabled due to insufficient RMIDs\n", + r->name, e->pfname, e->guid); + return false; + } + + for (int i = 0; i < p->count; i++) { + if (!p->regions[i].addr) + continue; + /* + * e->num_rmid only adjusted lower if user (via rdt= kernel + * parameter) forces an event group with insufficient RMID + * to be enabled. + */ + e->num_rmid = min(e->num_rmid, p->regions[i].num_rmids); + } + for (int j = 0; j < e->num_events; j++) { if (!resctrl_enable_mon_event(e->evts[j].id, true, e->evts[j].bin_bits, &e->evts[j])) @@ -219,6 +265,11 @@ static bool enable_events(struct event_group *e, struct pmt_feature_group *p) return false; } + if (r->mon.num_rmid) + r->mon.num_rmid = min(r->mon.num_rmid, e->num_rmid); + else + r->mon.num_rmid = e->num_rmid; + return true; } diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index b9363a938f74..90c4a199a288 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1158,7 +1158,7 @@ static int rdt_num_rmids_show(struct kernfs_open_file *of, { struct rdt_resource *r = rdt_kn_parent_priv(of->kn); - seq_printf(seq, "%d\n", r->mon.num_rmid); + seq_printf(seq, "%u\n", r->mon.num_rmid); return 0; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 451eb45e44f8..006e57fd7ca5 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -295,7 +295,7 @@ enum resctrl_schema_fmt { * events of monitor groups created via mkdir. */ struct resctrl_mon { - int num_rmid; + u32 num_rmid; unsigned int mbm_cfg_mask; int num_mbm_cntrs; bool mbm_cntr_assignable; -- cgit v1.2.3