summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt2
-rw-r--r--Documentation/filesystems/resctrl.rst134
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c1
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c9
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c40
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h5
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c1
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--fs/resctrl/ctrlmondata.c309
-rw-r--r--fs/resctrl/internal.h17
-rw-r--r--fs/resctrl/rdtgroup.c82
-rw-r--r--include/linux/resctrl.h24
13 files changed, 580 insertions, 47 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e63827475792..8c5636a120ee 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6207,7 +6207,7 @@
rdt= [HW,X86,RDT]
Turn on/off individual RDT features. List is:
cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
- mba, smba, bmec, abmc.
+ mba, smba, bmec, abmc, sdciae.
E.g. to turn on cmt and turn off mba use:
rdt=cmt,!mba
diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst
index b7f35b07876a..8c8ce678148a 100644
--- a/Documentation/filesystems/resctrl.rst
+++ b/Documentation/filesystems/resctrl.rst
@@ -17,17 +17,18 @@ AMD refers to this feature as AMD Platform Quality of Service(AMD QoS).
This feature is enabled by the CONFIG_X86_CPU_RESCTRL and the x86 /proc/cpuinfo
flag bits:
-=============================================== ================================
-RDT (Resource Director Technology) Allocation "rdt_a"
-CAT (Cache Allocation Technology) "cat_l3", "cat_l2"
-CDP (Code and Data Prioritization) "cdp_l3", "cdp_l2"
-CQM (Cache QoS Monitoring) "cqm_llc", "cqm_occup_llc"
-MBM (Memory Bandwidth Monitoring) "cqm_mbm_total", "cqm_mbm_local"
-MBA (Memory Bandwidth Allocation) "mba"
-SMBA (Slow Memory Bandwidth Allocation) ""
-BMEC (Bandwidth Monitoring Event Configuration) ""
-ABMC (Assignable Bandwidth Monitoring Counters) ""
-=============================================== ================================
+=============================================================== ================================
+RDT (Resource Director Technology) Allocation "rdt_a"
+CAT (Cache Allocation Technology) "cat_l3", "cat_l2"
+CDP (Code and Data Prioritization) "cdp_l3", "cdp_l2"
+CQM (Cache QoS Monitoring) "cqm_llc", "cqm_occup_llc"
+MBM (Memory Bandwidth Monitoring) "cqm_mbm_total", "cqm_mbm_local"
+MBA (Memory Bandwidth Allocation) "mba"
+SMBA (Slow Memory Bandwidth Allocation) ""
+BMEC (Bandwidth Monitoring Event Configuration) ""
+ABMC (Assignable Bandwidth Monitoring Counters) ""
+SDCIAE (Smart Data Cache Injection Allocation Enforcement) ""
+=============================================================== ================================
Historically, new features were made visible by default in /proc/cpuinfo. This
resulted in the feature flags becoming hard to parse by humans. Adding a new
@@ -72,6 +73,11 @@ The 'info' directory contains information about the enabled
resources. Each resource has its own subdirectory. The subdirectory
names reflect the resource names.
+Most of the files in the resource's subdirectory are read-only, and
+describe properties of the resource. Resources that support global
+configuration options also include writable files that can be used
+to modify those settings.
+
Each subdirectory contains the following files with respect to
allocation:
@@ -90,12 +96,19 @@ related to allocation:
must be set when writing a mask.
"shareable_bits":
- Bitmask of shareable resource with other executing
- entities (e.g. I/O). User can use this when
- setting up exclusive cache partitions. Note that
- some platforms support devices that have their
- own settings for cache use which can over-ride
- these bits.
+ Bitmask of shareable resource with other executing entities
+ (e.g. I/O). Applies to all instances of this resource. User
+ can use this when setting up exclusive cache partitions.
+ Note that some platforms support devices that have their
+ own settings for cache use which can over-ride these bits.
+
+ When "io_alloc" is enabled, a portion of each cache instance can
+ be configured for shared use between hardware and software.
+ "bit_usage" should be used to see which portions of each cache
+ instance is configured for hardware use via "io_alloc" feature
+ because every cache instance can have its "io_alloc" bitmask
+ configured independently via "io_alloc_cbm".
+
"bit_usage":
Annotated capacity bitmasks showing how all
instances of the resource are used. The legend is:
@@ -109,16 +122,16 @@ related to allocation:
"H":
Corresponding region is used by hardware only
but available for software use. If a resource
- has bits set in "shareable_bits" but not all
- of these bits appear in the resource groups'
- schematas then the bits appearing in
- "shareable_bits" but no resource group will
- be marked as "H".
+ has bits set in "shareable_bits" or "io_alloc_cbm"
+ but not all of these bits appear in the resource
+ groups' schemata then the bits appearing in
+ "shareable_bits" or "io_alloc_cbm" but no
+ resource group will be marked as "H".
"X":
Corresponding region is available for sharing and
- used by hardware and software. These are the
- bits that appear in "shareable_bits" as
- well as a resource group's allocation.
+ used by hardware and software. These are the bits
+ that appear in "shareable_bits" or "io_alloc_cbm"
+ as well as a resource group's allocation.
"S":
Corresponding region is used by software
and available for sharing.
@@ -136,6 +149,77 @@ related to allocation:
"1":
Non-contiguous 1s value in CBM is supported.
+"io_alloc":
+ "io_alloc" enables system software to configure the portion of
+ the cache allocated for I/O traffic. File may only exist if the
+ system supports this feature on some of its cache resources.
+
+ "disabled":
+ Resource supports "io_alloc" but the feature is disabled.
+ Portions of cache used for allocation of I/O traffic cannot
+ be configured.
+ "enabled":
+ Portions of cache used for allocation of I/O traffic
+ can be configured using "io_alloc_cbm".
+ "not supported":
+ Support not available for this resource.
+
+ The feature can be modified by writing to the interface, for example:
+
+ To enable::
+
+ # echo 1 > /sys/fs/resctrl/info/L3/io_alloc
+
+ To disable::
+
+ # echo 0 > /sys/fs/resctrl/info/L3/io_alloc
+
+ The underlying implementation may reduce resources available to
+ general (CPU) cache allocation. See architecture specific notes
+ below. Depending on usage requirements the feature can be enabled
+ or disabled.
+
+ On AMD systems, io_alloc feature is supported by the L3 Smart
+ Data Cache Injection Allocation Enforcement (SDCIAE). The CLOSID for
+ io_alloc is the highest CLOSID supported by the resource. When
+ io_alloc is enabled, the highest CLOSID is dedicated to io_alloc and
+ no longer available for general (CPU) cache allocation. When CDP is
+ enabled, io_alloc routes I/O traffic using the highest CLOSID allocated
+ for the instruction cache (CDP_CODE), making this CLOSID no longer
+ available for general (CPU) cache allocation for both the CDP_CODE
+ and CDP_DATA resources.
+
+"io_alloc_cbm":
+ Capacity bitmasks that describe the portions of cache instances to
+ which I/O traffic from supported I/O devices are routed when "io_alloc"
+ is enabled.
+
+ CBMs are displayed in the following format:
+
+ <cache_id0>=<cbm>;<cache_id1>=<cbm>;...
+
+ Example::
+
+ # cat /sys/fs/resctrl/info/L3/io_alloc_cbm
+ 0=ffff;1=ffff
+
+ CBMs can be configured by writing to the interface.
+
+ Example::
+
+ # echo 1=ff > /sys/fs/resctrl/info/L3/io_alloc_cbm
+ # cat /sys/fs/resctrl/info/L3/io_alloc_cbm
+ 0=ffff;1=00ff
+
+ # echo "0=ff;1=f" > /sys/fs/resctrl/info/L3/io_alloc_cbm
+ # cat /sys/fs/resctrl/info/L3/io_alloc_cbm
+ 0=00ff;1=000f
+
+ When CDP is enabled "io_alloc_cbm" associated with the CDP_DATA and CDP_CODE
+ resources may reflect the same values. For example, values read from and
+ written to /sys/fs/resctrl/info/L3DATA/io_alloc_cbm may be reflected by
+ /sys/fs/resctrl/info/L3CODE/io_alloc_cbm and vice versa.
+
Memory bandwidth(MB) subdirectory contains the following files
with respect to allocation:
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4091a776e37a..2abee3e9dc75 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -500,6 +500,8 @@
#define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */
#define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */
+#define X86_FEATURE_SDCIAE (21*32+18) /* L3 Smart Data Cache Injection Allocation Enforcement */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 46efcbd6afa4..87e78586395b 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -72,6 +72,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
{ X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_TOTAL },
{ X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_LOCAL },
+ { X86_FEATURE_SDCIAE, X86_FEATURE_CAT_L3 },
{ X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW },
{ X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES },
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 06ca5a30140c..3792ab4819dc 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -274,6 +274,11 @@ static void rdt_get_cdp_config(int level)
rdt_resources_all[level].r_resctrl.cdp_capable = true;
}
+static void rdt_set_io_alloc_capable(struct rdt_resource *r)
+{
+ r->cache.io_alloc_capable = true;
+}
+
static void rdt_get_cdp_l3_config(void)
{
rdt_get_cdp_config(RDT_RESOURCE_L3);
@@ -719,6 +724,7 @@ enum {
RDT_FLAG_SMBA,
RDT_FLAG_BMEC,
RDT_FLAG_ABMC,
+ RDT_FLAG_SDCIAE,
};
#define RDT_OPT(idx, n, f) \
@@ -745,6 +751,7 @@ static struct rdt_options rdt_options[] __ro_after_init = {
RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA),
RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC),
RDT_OPT(RDT_FLAG_ABMC, "abmc", X86_FEATURE_ABMC),
+ RDT_OPT(RDT_FLAG_SDCIAE, "sdciae", X86_FEATURE_SDCIAE),
};
#define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
@@ -853,6 +860,8 @@ static __init bool get_rdt_alloc_resources(void)
rdt_get_cache_alloc_cfg(1, r);
if (rdt_cpu_has(X86_FEATURE_CDP_L3))
rdt_get_cdp_l3_config();
+ if (rdt_cpu_has(X86_FEATURE_SDCIAE))
+ rdt_set_io_alloc_capable(r);
ret = true;
}
if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 1189c0df4ad7..b20e705606b8 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -91,3 +91,43 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
return hw_dom->ctrl_val[idx];
}
+
+bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r)
+{
+ return resctrl_to_arch_res(r)->sdciae_enabled;
+}
+
+static void resctrl_sdciae_set_one_amd(void *arg)
+{
+ bool *enable = arg;
+
+ if (*enable)
+ msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT);
+ else
+ msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT);
+}
+
+static void _resctrl_sdciae_enable(struct rdt_resource *r, bool enable)
+{
+ struct rdt_ctrl_domain *d;
+
+ /* Walking r->ctrl_domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
+ /* Update MSR_IA32_L3_QOS_EXT_CFG MSR on all the CPUs in all domains */
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list)
+ on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_sdciae_set_one_amd, &enable, 1);
+}
+
+int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable)
+{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+
+ if (hw_res->r_resctrl.cache.io_alloc_capable &&
+ hw_res->sdciae_enabled != enable) {
+ _resctrl_sdciae_enable(r, enable);
+ hw_res->sdciae_enabled = enable;
+ }
+
+ return 0;
+}
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 9f4c2f0aaf5c..4a916c84a322 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -46,6 +46,9 @@ struct arch_mbm_state {
#define ABMC_EXTENDED_EVT_ID BIT(31)
#define ABMC_EVT_ID BIT(0)
+/* Setting bit 1 in MSR_IA32_L3_QOS_EXT_CFG enables the SDCIAE feature. */
+#define SDCIAE_ENABLE_BIT 1
+
/**
* struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
* a resource for a control function
@@ -112,6 +115,7 @@ struct msr_param {
* @mbm_width: Monitor width, to detect and correct for overflow.
* @cdp_enabled: CDP state of this resource
* @mbm_cntr_assign_enabled: ABMC feature is enabled
+ * @sdciae_enabled: SDCIAE feature (backing "io_alloc") is enabled.
*
* Members of this structure are either private to the architecture
* e.g. mbm_width, or accessed via helpers that provide abstraction. e.g.
@@ -126,6 +130,7 @@ struct rdt_hw_resource {
unsigned int mbm_width;
bool cdp_enabled;
bool mbm_cntr_assign_enabled;
+ bool sdciae_enabled;
};
static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r)
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index fe1a2aa53c16..dffcc8307500 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -361,6 +361,7 @@ static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0),
X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0),
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0),
+ X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, 0),
{}
};
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index caa4dc885c21..d113863a8eab 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -53,6 +53,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
{ X86_FEATURE_ABMC, CPUID_EBX, 5, 0x80000020, 0 },
+ { X86_FEATURE_SDCIAE, CPUID_EBX, 6, 0x80000020, 0 },
{ X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 },
{ X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 },
{ X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 },
diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
index 0d0ef54fc4de..b2d178d3556e 100644
--- a/fs/resctrl/ctrlmondata.c
+++ b/fs/resctrl/ctrlmondata.c
@@ -24,7 +24,8 @@
#include "internal.h"
struct rdt_parse_data {
- struct rdtgroup *rdtgrp;
+ u32 closid;
+ enum rdtgrp_mode mode;
char *buf;
};
@@ -77,8 +78,8 @@ static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
struct rdt_ctrl_domain *d)
{
struct resctrl_staged_config *cfg;
- u32 closid = data->rdtgrp->closid;
struct rdt_resource *r = s->res;
+ u32 closid = data->closid;
u32 bw_val;
cfg = &d->staged_config[s->conf_type];
@@ -156,9 +157,10 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
struct rdt_ctrl_domain *d)
{
- struct rdtgroup *rdtgrp = data->rdtgrp;
+ enum rdtgrp_mode mode = data->mode;
struct resctrl_staged_config *cfg;
struct rdt_resource *r = s->res;
+ u32 closid = data->closid;
u32 cbm_val;
cfg = &d->staged_config[s->conf_type];
@@ -171,7 +173,7 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
* Cannot set up more than one pseudo-locked region in a cache
* hierarchy.
*/
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
+ if (mode == RDT_MODE_PSEUDO_LOCKSETUP &&
rdtgroup_pseudo_locked_in_hierarchy(d)) {
rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n");
return -EINVAL;
@@ -180,8 +182,7 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
if (!cbm_validate(data->buf, &cbm_val, r))
return -EINVAL;
- if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
- rdtgrp->mode == RDT_MODE_SHAREABLE) &&
+ if ((mode == RDT_MODE_EXCLUSIVE || mode == RDT_MODE_SHAREABLE) &&
rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) {
rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n");
return -EINVAL;
@@ -191,14 +192,14 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
* The CBM may not overlap with the CBM of another closid if
* either is exclusive.
*/
- if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) {
+ if (rdtgroup_cbm_overlaps(s, d, cbm_val, closid, true)) {
rdt_last_cmd_puts("Overlaps with exclusive group\n");
return -EINVAL;
}
- if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) {
- if (rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
- rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ if (rdtgroup_cbm_overlaps(s, d, cbm_val, closid, false)) {
+ if (mode == RDT_MODE_EXCLUSIVE ||
+ mode == RDT_MODE_PSEUDO_LOCKSETUP) {
rdt_last_cmd_puts("Overlaps with other group\n");
return -EINVAL;
}
@@ -262,7 +263,8 @@ next:
list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
if (d->hdr.id == dom_id) {
data.buf = dom;
- data.rdtgrp = rdtgrp;
+ data.closid = rdtgrp->closid;
+ data.mode = rdtgrp->mode;
if (parse_ctrlval(&data, s, d))
return -EINVAL;
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
@@ -381,7 +383,8 @@ out:
return ret ?: nbytes;
}
-static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid)
+static void show_doms(struct seq_file *s, struct resctrl_schema *schema,
+ char *resource_name, int closid)
{
struct rdt_resource *r = schema->res;
struct rdt_ctrl_domain *dom;
@@ -391,7 +394,8 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
- seq_printf(s, "%*s:", max_name_width, schema->name);
+ if (resource_name)
+ seq_printf(s, "%*s:", max_name_width, resource_name);
list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
if (sep)
seq_puts(s, ";");
@@ -437,7 +441,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
closid = rdtgrp->closid;
list_for_each_entry(schema, &resctrl_schema_all, list) {
if (closid < schema->num_closid)
- show_doms(s, schema, closid);
+ show_doms(s, schema, schema->name, closid);
}
}
} else {
@@ -676,3 +680,280 @@ out:
rdtgroup_kn_unlock(of->kn);
return ret;
}
+
+int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r = s->res;
+
+ mutex_lock(&rdtgroup_mutex);
+
+ if (r->cache.io_alloc_capable) {
+ if (resctrl_arch_get_io_alloc_enabled(r))
+ seq_puts(seq, "enabled\n");
+ else
+ seq_puts(seq, "disabled\n");
+ } else {
+ seq_puts(seq, "not supported\n");
+ }
+
+ mutex_unlock(&rdtgroup_mutex);
+
+ return 0;
+}
+
+/*
+ * resctrl_io_alloc_closid_supported() - io_alloc feature utilizes the
+ * highest CLOSID value to direct I/O traffic. Ensure that io_alloc_closid
+ * is in the supported range.
+ */
+static bool resctrl_io_alloc_closid_supported(u32 io_alloc_closid)
+{
+ return io_alloc_closid < closids_supported();
+}
+
+/*
+ * Initialize io_alloc CLOSID cache resource CBM with all usable (shared
+ * and unused) cache portions.
+ */
+static int resctrl_io_alloc_init_cbm(struct resctrl_schema *s, u32 closid)
+{
+ enum resctrl_conf_type peer_type;
+ struct rdt_resource *r = s->res;
+ struct rdt_ctrl_domain *d;
+ int ret;
+
+ rdt_staged_configs_clear();
+
+ ret = rdtgroup_init_cat(s, closid);
+ if (ret < 0)
+ goto out;
+
+ /* Keep CDP_CODE and CDP_DATA of io_alloc CLOSID's CBM in sync. */
+ if (resctrl_arch_get_cdp_enabled(r->rid)) {
+ peer_type = resctrl_peer_type(s->conf_type);
+ list_for_each_entry(d, &s->res->ctrl_domains, hdr.list)
+ memcpy(&d->staged_config[peer_type],
+ &d->staged_config[s->conf_type],
+ sizeof(d->staged_config[0]));
+ }
+
+ ret = resctrl_arch_update_domains(r, closid);
+out:
+ rdt_staged_configs_clear();
+ return ret;
+}
+
+/*
+ * resctrl_io_alloc_closid() - io_alloc feature routes I/O traffic using
+ * the highest available CLOSID. Retrieve the maximum CLOSID supported by the
+ * resource. Note that if Code Data Prioritization (CDP) is enabled, the number
+ * of available CLOSIDs is reduced by half.
+ */
+u32 resctrl_io_alloc_closid(struct rdt_resource *r)
+{
+ if (resctrl_arch_get_cdp_enabled(r->rid))
+ return resctrl_arch_get_num_closid(r) / 2 - 1;
+ else
+ return resctrl_arch_get_num_closid(r) - 1;
+}
+
+ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r = s->res;
+ char const *grp_name;
+ u32 io_alloc_closid;
+ bool enable;
+ int ret;
+
+ ret = kstrtobool(buf, &enable);
+ if (ret)
+ return ret;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ rdt_last_cmd_clear();
+
+ if (!r->cache.io_alloc_capable) {
+ rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name);
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ /* If the feature is already up to date, no action is needed. */
+ if (resctrl_arch_get_io_alloc_enabled(r) == enable)
+ goto out_unlock;
+
+ io_alloc_closid = resctrl_io_alloc_closid(r);
+ if (!resctrl_io_alloc_closid_supported(io_alloc_closid)) {
+ rdt_last_cmd_printf("io_alloc CLOSID (ctrl_hw_id) %u is not available\n",
+ io_alloc_closid);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (enable) {
+ if (!closid_alloc_fixed(io_alloc_closid)) {
+ grp_name = rdtgroup_name_by_closid(io_alloc_closid);
+ WARN_ON_ONCE(!grp_name);
+ rdt_last_cmd_printf("CLOSID (ctrl_hw_id) %u for io_alloc is used by %s group\n",
+ io_alloc_closid, grp_name ? grp_name : "another");
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+
+ ret = resctrl_io_alloc_init_cbm(s, io_alloc_closid);
+ if (ret) {
+ rdt_last_cmd_puts("Failed to initialize io_alloc allocations\n");
+ closid_free(io_alloc_closid);
+ goto out_unlock;
+ }
+ } else {
+ closid_free(io_alloc_closid);
+ }
+
+ ret = resctrl_arch_io_alloc_enable(r, enable);
+ if (enable && ret) {
+ rdt_last_cmd_puts("Failed to enable io_alloc feature\n");
+ closid_free(io_alloc_closid);
+ }
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
+ return ret ?: nbytes;
+}
+
+int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r = s->res;
+ int ret = 0;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ rdt_last_cmd_clear();
+
+ if (!r->cache.io_alloc_capable) {
+ rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name);
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ if (!resctrl_arch_get_io_alloc_enabled(r)) {
+ rdt_last_cmd_printf("io_alloc is not enabled on %s\n", s->name);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * When CDP is enabled, the CBMs of the highest CLOSID of CDP_CODE and
+ * CDP_DATA are kept in sync. As a result, the io_alloc CBMs shown for
+ * either CDP resource are identical and accurately represent the CBMs
+ * used for I/O.
+ */
+ show_doms(seq, s, NULL, resctrl_io_alloc_closid(r));
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+ return ret;
+}
+
+static int resctrl_io_alloc_parse_line(char *line, struct rdt_resource *r,
+ struct resctrl_schema *s, u32 closid)
+{
+ enum resctrl_conf_type peer_type;
+ struct rdt_parse_data data;
+ struct rdt_ctrl_domain *d;
+ char *dom = NULL, *id;
+ unsigned long dom_id;
+
+next:
+ if (!line || line[0] == '\0')
+ return 0;
+
+ dom = strsep(&line, ";");
+ id = strsep(&dom, "=");
+ if (!dom || kstrtoul(id, 10, &dom_id)) {
+ rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
+ return -EINVAL;
+ }
+
+ dom = strim(dom);
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
+ data.buf = dom;
+ data.mode = RDT_MODE_SHAREABLE;
+ data.closid = closid;
+ if (parse_cbm(&data, s, d))
+ return -EINVAL;
+ /*
+ * Keep io_alloc CLOSID's CBM of CDP_CODE and CDP_DATA
+ * in sync.
+ */
+ if (resctrl_arch_get_cdp_enabled(r->rid)) {
+ peer_type = resctrl_peer_type(s->conf_type);
+ memcpy(&d->staged_config[peer_type],
+ &d->staged_config[s->conf_type],
+ sizeof(d->staged_config[0]));
+ }
+ goto next;
+ }
+ }
+
+ return -EINVAL;
+}
+
+ssize_t resctrl_io_alloc_cbm_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r = s->res;
+ u32 io_alloc_closid;
+ int ret = 0;
+
+ /* Valid input requires a trailing newline */
+ if (nbytes == 0 || buf[nbytes - 1] != '\n')
+ return -EINVAL;
+
+ buf[nbytes - 1] = '\0';
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+ rdt_last_cmd_clear();
+
+ if (!r->cache.io_alloc_capable) {
+ rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name);
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ if (!resctrl_arch_get_io_alloc_enabled(r)) {
+ rdt_last_cmd_printf("io_alloc is not enabled on %s\n", s->name);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ io_alloc_closid = resctrl_io_alloc_closid(r);
+
+ rdt_staged_configs_clear();
+ ret = resctrl_io_alloc_parse_line(buf, r, s, io_alloc_closid);
+ if (ret)
+ goto out_clear_configs;
+
+ ret = resctrl_arch_update_domains(r, io_alloc_closid);
+
+out_clear_configs:
+ rdt_staged_configs_clear();
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
+ return ret ?: nbytes;
+}
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index cf1fd82dc5a9..bff4a54ae333 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -390,6 +390,8 @@ void rdt_staged_configs_clear(void);
bool closid_allocated(unsigned int closid);
+bool closid_alloc_fixed(u32 closid);
+
int resctrl_find_cleanest_closid(void);
void *rdt_kn_parent_priv(struct kernfs_node *kn);
@@ -426,6 +428,21 @@ int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, voi
ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
loff_t off);
+int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, void *v);
+
+int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid);
+
+enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type);
+
+ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off);
+
+const char *rdtgroup_name_by_closid(u32 closid);
+int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq,
+ void *v);
+ssize_t resctrl_io_alloc_cbm_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off);
+u32 resctrl_io_alloc_closid(struct rdt_resource *r);
#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK
int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 0320360cd7a6..8e39dfda56bc 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -226,6 +226,11 @@ bool closid_allocated(unsigned int closid)
return !test_bit(closid, closid_free_map);
}
+bool closid_alloc_fixed(u32 closid)
+{
+ return __test_and_clear_bit(closid, closid_free_map);
+}
+
/**
* rdtgroup_mode_by_closid - Return mode of resource group with closid
* @closid: closid if the resource group
@@ -1057,15 +1062,17 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
- hw_shareable = r->cache.shareable_bits;
list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
if (sep)
seq_putc(seq, ';');
+ hw_shareable = r->cache.shareable_bits;
sw_shareable = 0;
exclusive = 0;
seq_printf(seq, "%d=", dom->hdr.id);
for (i = 0; i < closids_supported(); i++) {
- if (!closid_allocated(i))
+ if (!closid_allocated(i) ||
+ (resctrl_arch_get_io_alloc_enabled(r) &&
+ i == resctrl_io_alloc_closid(r)))
continue;
ctrl_val = resctrl_arch_get_config(r, dom, i,
s->conf_type);
@@ -1093,6 +1100,21 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
break;
}
}
+
+ /*
+ * When the "io_alloc" feature is enabled, a portion of the cache
+ * is configured for shared use between hardware and software.
+ * Also, when CDP is enabled the CBMs of CDP_CODE and CDP_DATA
+ * resources are kept in sync. So, the CBMs for "io_alloc" can
+ * be accessed through either resource.
+ */
+ if (resctrl_arch_get_io_alloc_enabled(r)) {
+ ctrl_val = resctrl_arch_get_config(r, dom,
+ resctrl_io_alloc_closid(r),
+ s->conf_type);
+ hw_shareable |= ctrl_val;
+ }
+
for (i = r->cache.cbm_len - 1; i >= 0; i--) {
pseudo_locked = dom->plr ? dom->plr->cbm : 0;
hwb = test_bit(i, &hw_shareable);
@@ -1247,7 +1269,7 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
return 0;
}
-static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
+enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
{
switch (my_type) {
case CDP_CODE:
@@ -1838,6 +1860,18 @@ void resctrl_bmec_files_show(struct rdt_resource *r, struct kernfs_node *l3_mon_
kernfs_put(mon_kn);
}
+const char *rdtgroup_name_by_closid(u32 closid)
+{
+ struct rdtgroup *rdtgrp;
+
+ list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
+ if (rdtgrp->closid == closid)
+ return rdt_kn_name(rdtgrp->kn);
+ }
+
+ return NULL;
+}
+
/* rdtgroup information files for one cache resource. */
static struct rftype res_common_files[] = {
{
@@ -1948,6 +1982,20 @@ static struct rftype res_common_files[] = {
.seq_show = rdt_thread_throttle_mode_show,
},
{
+ .name = "io_alloc",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = resctrl_io_alloc_show,
+ .write = resctrl_io_alloc_write,
+ },
+ {
+ .name = "io_alloc_cbm",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = resctrl_io_alloc_cbm_show,
+ .write = resctrl_io_alloc_cbm_write,
+ },
+ {
.name = "max_threshold_occupancy",
.mode = 0644,
.kf_ops = &rdtgroup_kf_single_ops,
@@ -2138,6 +2186,23 @@ static void thread_throttle_mode_init(void)
RFTYPE_CTRL_INFO | RFTYPE_RES_MB);
}
+/*
+ * The resctrl file "io_alloc" is added using L3 resource. However, it results
+ * in this file being visible for *all* cache resources (eg. L2 cache),
+ * whether it supports "io_alloc" or not.
+ */
+static void io_alloc_init(void)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+
+ if (r->cache.io_alloc_capable) {
+ resctrl_file_fflags_init("io_alloc", RFTYPE_CTRL_INFO |
+ RFTYPE_RES_CACHE);
+ resctrl_file_fflags_init("io_alloc_cbm",
+ RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE);
+ }
+}
+
void resctrl_file_fflags_init(const char *config, unsigned long fflags)
{
struct rftype *rft;
@@ -3383,11 +3448,12 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
{
unsigned int cbm_len = r->cache.cbm_len;
unsigned long first_bit, zero_bit;
- unsigned long val = _val;
+ unsigned long val;
- if (!val)
- return 0;
+ if (!_val || r->cache.arch_has_sparse_bitmasks)
+ return _val;
+ val = _val;
first_bit = find_first_bit(&val, cbm_len);
zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
@@ -3480,7 +3546,7 @@ static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schem
* If there are no more shareable bits available on any domain then
* the entire allocation will fail.
*/
-static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
+int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
{
struct rdt_ctrl_domain *d;
int ret;
@@ -4408,6 +4474,8 @@ int resctrl_init(void)
thread_throttle_mode_init();
+ io_alloc_init();
+
ret = resctrl_mon_resource_init();
if (ret)
return ret;
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index a7d92718b653..54701668b3df 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -206,6 +206,8 @@ struct rdt_mon_domain {
* @arch_has_sparse_bitmasks: True if a bitmask like f00f is valid.
* @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache
* level has CPU scope.
+ * @io_alloc_capable: True if portion of the cache can be configured
+ * for I/O traffic.
*/
struct resctrl_cache {
unsigned int cbm_len;
@@ -213,6 +215,7 @@ struct resctrl_cache {
unsigned int shareable_bits;
bool arch_has_sparse_bitmasks;
bool arch_has_per_cpu_cfg;
+ bool io_alloc_capable;
};
/**
@@ -654,6 +657,27 @@ void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 closid, u32 rmid, int cntr_id,
enum resctrl_event_id eventid);
+/**
+ * resctrl_arch_io_alloc_enable() - Enable/disable io_alloc feature.
+ * @r: The resctrl resource.
+ * @enable: Enable (true) or disable (false) io_alloc on resource @r.
+ *
+ * This can be called from any CPU.
+ *
+ * Return:
+ * 0 on success, <0 on error.
+ */
+int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable);
+
+/**
+ * resctrl_arch_get_io_alloc_enabled() - Get io_alloc feature state.
+ * @r: The resctrl resource.
+ *
+ * Return:
+ * true if io_alloc is enabled or false if disabled.
+ */
+bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r);
+
extern unsigned int resctrl_rmid_realloc_threshold;
extern unsigned int resctrl_rmid_realloc_limit;