From 9e69bafece43dcefec864f00b3ec7e088aa7fcbc Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 18 Sep 2025 11:22:05 +0200 Subject: drm/xe: Don't copy pinned kernel bos twice on suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were copying the bo content the bos on the list "xe->pinned.late.kernel_bo_present" twice on suspend. Presumingly the intent is to copy the pinned external bos on the first pass. This is harmless since we (currently) should have no pinned external bos needing copy since a) exernal system bos don't have compressed content, b) We do not (yet) allow pinning of VRAM bos. Still, fix this up so that we copy pinned external bos on the first pass. We're about to allow bos pinned in VRAM. Fixes: c6a4d46ec1d7 ("drm/xe: evict user memory in PM notifier") Cc: Matthew Auld Cc: # v6.16+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250918092207.54472-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_bo_evict.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 7484ce55a303..d5dbc51e8612 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -158,8 +158,8 @@ int xe_bo_evict_all(struct xe_device *xe) if (ret) return ret; - ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, - &xe->pinned.late.evicted, xe_bo_evict_pinned); + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, xe_bo_evict_pinned); if (!ret) ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, -- cgit v1.2.3 From 8b3dfa6fcf2603ef24cd501433f26f5d184d3732 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 18 Sep 2025 11:22:06 +0200 Subject: drm/xe: Pre-allocate system memory for pinned external bos in the pm notfier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similarly to what we do for other pinned bos, pre-allocate system memory for pinned external bos in the pm notifier, where swapping is still possible. This hasn't been needed until now when we're about to allow pinning of exernal VRAM bos. Cc: Matthew Auld Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250918092207.54472-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_bo_evict.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index d5dbc51e8612..1a12675b2ea9 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -73,6 +73,11 @@ int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe) &xe->pinned.late.kernel_bo_present, xe_bo_notifier_prepare_pinned); + if (!ret) + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, + xe_bo_notifier_prepare_pinned); + return ret; } @@ -93,6 +98,10 @@ void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe) (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, &xe->pinned.late.kernel_bo_present, xe_bo_notifier_unprepare_pinned); + + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, + xe_bo_notifier_unprepare_pinned); } /** -- cgit v1.2.3 From df636bf2836644667c632864e25878bd928154f7 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 18 Sep 2025 11:22:07 +0200 Subject: drm/xe/dma-buf: Allow pinning of p2p dma-buf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RDMA NICs typically requires the VRAM dma-bufs to be pinned in VRAM for pcie-p2p communication, since they don't fully support the move_notify() scheme. We would like to support that. However allowing unaccounted pinning of VRAM creates a DOS vector so up until now we haven't allowed it. However with cgroups support in TTM, the amount of VRAM allocated to a cgroup can be limited, and since also the pinned memory is accounted as allocated VRAM we should be safe. An analogy with system memory can be made if we observe the similarity with kernel system memory that is allocated as the result of user-space action and that is accounted using __GFP_ACCOUNT. Ideally, to be more flexible, we would add a "pinned_memory", or possibly "kernel_memory" limit to the dmem cgroups controller, that would additionally limit the memory that is pinned in this way. If we let that limit default to the dmem::max limit we can introduce that without needing to care about regressions. Considering that we already pin VRAM in this way for at least page-table memory and LRC memory, and the above path to greater flexibility, allow this also for dma-bufs. v2: - Update comments about pinning in the dma-buf kunit test (Niranjana Vishwanathapura) Cc: Dave Airlie Cc: Simona Vetter Cc: Joonas Lahtinen Cc: Maarten Lankhorst Cc: Matthew Brost Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: Niranjana Vishwanathapura Signed-off-by: Thomas Hellström Acked-by: Simona Vetter Reviewed-by: Maarten Lankhorst Link: https://lore.kernel.org/r/20250918092207.54472-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/tests/xe_dma_buf.c | 17 +++++++++++++-- drivers/gpu/drm/xe/xe_dma_buf.c | 41 ++++++++++++++++++++++------------- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index a7e548a2bdfb..5df98de5ba3c 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -31,6 +31,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, struct drm_exec *exec) { struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); + struct dma_buf_attachment *attach; u32 mem_type; int ret; @@ -46,7 +47,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, mem_type = XE_PL_TT; else if (params->force_different_devices && !is_dynamic(params) && (params->mem_mask & XE_BO_FLAG_SYSTEM)) - /* Pin migrated to TT */ + /* Pin migrated to TT on non-dynamic attachments. */ mem_type = XE_PL_TT; if (!xe_bo_is_mem_type(exported, mem_type)) { @@ -88,6 +89,18 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); + /* Check that we can pin without migrating. */ + attach = list_first_entry_or_null(&dmabuf->attachments, typeof(*attach), node); + if (attach) { + int err = dma_buf_pin(attach); + + if (!err) { + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); + dma_buf_unpin(attach); + } + KUNIT_EXPECT_EQ(test, err, 0); + } + if (params->force_different_devices) KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT)); else @@ -150,7 +163,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) xe_bo_lock(import_bo, false); err = xe_bo_validate(import_bo, NULL, false, exec); - /* Pinning in VRAM is not allowed. */ + /* Pinning in VRAM is not allowed for non-dynamic attachments */ if (!is_dynamic(params) && params->force_different_devices && !(params->mem_mask & XE_BO_FLAG_SYSTEM)) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index a7d67725c3ee..54e42960daad 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -48,32 +48,43 @@ static void xe_dma_buf_detach(struct dma_buf *dmabuf, static int xe_dma_buf_pin(struct dma_buf_attachment *attach) { - struct drm_gem_object *obj = attach->dmabuf->priv; + struct dma_buf *dmabuf = attach->dmabuf; + struct drm_gem_object *obj = dmabuf->priv; struct xe_bo *bo = gem_to_xe_bo(obj); struct xe_device *xe = xe_bo_device(bo); struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; + bool allow_vram = true; int ret; - /* - * For now only support pinning in TT memory, for two reasons: - * 1) Avoid pinning in a placement not accessible to some importers. - * 2) Pinning in VRAM requires PIN accounting which is a to-do. - */ - if (xe_bo_is_pinned(bo) && !xe_bo_is_mem_type(bo, XE_PL_TT)) { + if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { + allow_vram = false; + } else { + list_for_each_entry(attach, &dmabuf->attachments, node) { + if (!attach->peer2peer) { + allow_vram = false; + break; + } + } + } + + if (xe_bo_is_pinned(bo) && !xe_bo_is_mem_type(bo, XE_PL_TT) && + !(xe_bo_is_vram(bo) && allow_vram)) { drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n"); return -EINVAL; } - ret = xe_bo_migrate(bo, XE_PL_TT, NULL, exec); - if (ret) { - if (ret != -EINTR && ret != -ERESTARTSYS) - drm_dbg(&xe->drm, - "Failed migrating dma-buf to TT memory: %pe\n", - ERR_PTR(ret)); - return ret; + if (!allow_vram) { + ret = xe_bo_migrate(bo, XE_PL_TT, NULL, exec); + if (ret) { + if (ret != -EINTR && ret != -ERESTARTSYS) + drm_dbg(&xe->drm, + "Failed migrating dma-buf to TT memory: %pe\n", + ERR_PTR(ret)); + return ret; + } } - ret = xe_bo_pin_external(bo, true, exec); + ret = xe_bo_pin_external(bo, !allow_vram, exec); xe_assert(xe, !ret); return 0; -- cgit v1.2.3 From 1364a9ead45f62a49533331687dc894f5a057cfe Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 18 Sep 2025 16:28:47 +0200 Subject: drm/xe/pm: Hold the validation lock around evicting user-space bos for suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During pm notifier eviction we may still race with validations. Ensure those are blocked out during eviction to ensure we have access to as much system memory as possible. During the suspend operation itself, we run single-threaded so that shouldn't be a problem. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250918142848.21807-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_pm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index d6625c71115b..1ec03ef19d9e 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -329,9 +329,15 @@ static int xe_pm_notifier_callback(struct notifier_block *nb, switch (action) { case PM_HIBERNATION_PREPARE: case PM_SUSPEND_PREPARE: + { + struct xe_validation_ctx ctx; + reinit_completion(&xe->pm_block); xe_pm_runtime_get(xe); + (void)xe_validation_ctx_init(&ctx, &xe->val, NULL, + (struct xe_val_flags) {.exclusive = true}); err = xe_bo_evict_all_user(xe); + xe_validation_ctx_fini(&ctx); if (err) drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); @@ -344,6 +350,7 @@ static int xe_pm_notifier_callback(struct notifier_block *nb, * allocations. */ break; + } case PM_POST_HIBERNATION: case PM_POST_SUSPEND: complete_all(&xe->pm_block); -- cgit v1.2.3 From f73f6dd312a5616a8d97860663f6c88ffba43ad4 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 18 Sep 2025 16:28:48 +0200 Subject: drm/xe/pm: Add lockdep annotation for the pm_block completion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to how we annotate dma-fences, add lockep annotation to the pm_block completion to ensure we don't wait for it while holding locks that are needed in the pm notifier or in the device suspend / resume callbacks. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250918142848.21807-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_exec.c | 3 ++- drivers/gpu/drm/xe/xe_pm.c | 59 ++++++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_pm.h | 2 ++ drivers/gpu/drm/xe/xe_vm.c | 2 ++ 4 files changed, 65 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 7715e74bb945..83897950f0da 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -16,6 +16,7 @@ #include "xe_exec_queue.h" #include "xe_hw_engine_group.h" #include "xe_macros.h" +#include "xe_pm.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_sync.h" @@ -247,7 +248,7 @@ retry: * on task freezing during suspend / hibernate, the call will * return -ERESTARTSYS and the IOCTL will be rerun. */ - err = wait_for_completion_interruptible(&xe->pm_block); + err = xe_pm_block_on_suspend(xe); if (err) goto err_unlock_list; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 1ec03ef19d9e..96afa49f0b4b 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -83,8 +83,58 @@ static struct lockdep_map xe_pm_runtime_d3cold_map = { static struct lockdep_map xe_pm_runtime_nod3cold_map = { .name = "xe_rpm_nod3cold_map" }; + +static struct lockdep_map xe_pm_block_lockdep_map = { + .name = "xe_pm_block_map", +}; #endif +static void xe_pm_block_begin_signalling(void) +{ + lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_); +} + +static void xe_pm_block_end_signalling(void) +{ + lock_release(&xe_pm_block_lockdep_map, _RET_IP_); +} + +/** + * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend + * + * Annotation to use where the code might block or sieze to make + * progress pending resume completion. + */ +void xe_pm_might_block_on_suspend(void) +{ + lock_map_acquire(&xe_pm_block_lockdep_map); + lock_map_release(&xe_pm_block_lockdep_map); +} + +/** + * xe_pm_might_block_on_suspend() - Block pending suspend. + * @xe: The xe device about to be suspended. + * + * Block if the pm notifier has start evicting bos, to avoid + * racing and validating those bos back. The function is + * annotated to ensure no locks are held that are also grabbed + * in the pm notifier or the device suspend / resume. + * This is intended to be used by freezable tasks only. + * (Not freezable workqueues), with the intention that the function + * returns %-ERESTARTSYS when tasks are frozen during suspend, + * and allows the task to freeze. The caller must be able to + * handle the %-ERESTARTSYS. + * + * Return: %0 on success, %-ERESTARTSYS on signal pending or + * if freezing requested. + */ +int xe_pm_block_on_suspend(struct xe_device *xe) +{ + xe_pm_might_block_on_suspend(); + + return wait_for_completion_interruptible(&xe->pm_block); +} + /** * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context * @xe: The xe device. @@ -124,6 +174,7 @@ int xe_pm_suspend(struct xe_device *xe) int err; drm_dbg(&xe->drm, "Suspending device\n"); + xe_pm_block_begin_signalling(); trace_xe_pm_suspend(xe, __builtin_return_address(0)); err = xe_pxp_pm_suspend(xe->pxp); @@ -155,6 +206,8 @@ int xe_pm_suspend(struct xe_device *xe) xe_i2c_pm_suspend(xe); drm_dbg(&xe->drm, "Device suspended\n"); + xe_pm_block_end_signalling(); + return 0; err_display: @@ -162,6 +215,7 @@ err_display: xe_pxp_pm_resume(xe->pxp); err: drm_dbg(&xe->drm, "Device suspend failed %d\n", err); + xe_pm_block_end_signalling(); return err; } @@ -178,6 +232,7 @@ int xe_pm_resume(struct xe_device *xe) u8 id; int err; + xe_pm_block_begin_signalling(); drm_dbg(&xe->drm, "Resuming device\n"); trace_xe_pm_resume(xe, __builtin_return_address(0)); @@ -222,9 +277,11 @@ int xe_pm_resume(struct xe_device *xe) xe_late_bind_fw_load(&xe->late_bind); drm_dbg(&xe->drm, "Device resumed\n"); + xe_pm_block_end_signalling(); return 0; err: drm_dbg(&xe->drm, "Device resume failed %d\n", err); + xe_pm_block_end_signalling(); return err; } @@ -333,6 +390,7 @@ static int xe_pm_notifier_callback(struct notifier_block *nb, struct xe_validation_ctx ctx; reinit_completion(&xe->pm_block); + xe_pm_block_begin_signalling(); xe_pm_runtime_get(xe); (void)xe_validation_ctx_init(&ctx, &xe->val, NULL, (struct xe_val_flags) {.exclusive = true}); @@ -349,6 +407,7 @@ static int xe_pm_notifier_callback(struct notifier_block *nb, * avoid a runtime suspend interfering with evicted objects or backup * allocations. */ + xe_pm_block_end_signalling(); break; } case PM_POST_HIBERNATION: diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 59678b310e55..f7f89a18b6fc 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -33,6 +33,8 @@ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); bool xe_rpm_reclaim_safe(const struct xe_device *xe); struct task_struct *xe_pm_read_callback_task(struct xe_device *xe); +int xe_pm_block_on_suspend(struct xe_device *xe); +void xe_pm_might_block_on_suspend(void); int xe_pm_module_init(void); #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 0cacab20ff85..80b7f13ecd80 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -466,6 +466,8 @@ static void preempt_rebind_work_func(struct work_struct *w) retry: if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { up_write(&vm->lock); + /* We don't actually block but don't make progress. */ + xe_pm_might_block_on_suspend(); return; } -- cgit v1.2.3 From 2598d9b4208cafa46f6649e679cdba67fcf0436a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 22 Sep 2025 15:11:34 -0700 Subject: drm/xe/psmi: Do not return NULL The checks for id and bo_size are impossible conditions. If they were possible, then the caller should not be using IS_ERR(). Just replace them with asserts which should be compiled out when not debugging and at the same time prevent other refactors to break this assumption. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/aK1nZjyAF0s7bnHg@stanley.mountain Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250922221133.109921-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_psmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_psmi.c b/drivers/gpu/drm/xe/xe_psmi.c index 45d142191d60..6a54e38b81ba 100644 --- a/drivers/gpu/drm/xe/xe_psmi.c +++ b/drivers/gpu/drm/xe/xe_psmi.c @@ -70,8 +70,8 @@ static struct xe_bo *psmi_alloc_object(struct xe_device *xe, { struct xe_tile *tile; - if (!id || !bo_size) - return NULL; + xe_assert(xe, id); + xe_assert(xe, bo_size); tile = &xe->tiles[id - 1]; -- cgit v1.2.3 From 126d33f6711a4e8dc9613a146932e81a988b3dc2 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 19 Sep 2025 18:04:29 +0200 Subject: drm/xe/debugfs: Make ggtt file per-tile Due to initial lack of per-tile debugfs directories, the ggtt file attribute was created as per-GT file. Fix that since now we have proper per-tile directories. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250919160430.573-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 13 ------------- drivers/gpu/drm/xe/xe_tile_debugfs.c | 7 +++++++ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index f253e2df4907..e4eba91cb83d 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -12,7 +12,6 @@ #include "xe_device.h" #include "xe_force_wake.h" -#include "xe_ggtt.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_gt_idle.h" @@ -142,17 +141,6 @@ static int steering(struct xe_gt *gt, struct drm_printer *p) return 0; } -static int ggtt(struct xe_gt *gt, struct drm_printer *p) -{ - int ret; - - xe_pm_runtime_get(gt_to_xe(gt)); - ret = xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - - return ret; -} - static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) { struct xe_hw_engine *hwe; @@ -279,7 +267,6 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p) */ static const struct drm_info_list vf_safe_debugfs_list[] = { {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, - {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, {"tunings", .show = xe_gt_debugfs_simple_show, .data = tunings}, diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.c b/drivers/gpu/drm/xe/xe_tile_debugfs.c index 5523874cba7b..a3f437d38f86 100644 --- a/drivers/gpu/drm/xe/xe_tile_debugfs.c +++ b/drivers/gpu/drm/xe/xe_tile_debugfs.c @@ -6,6 +6,7 @@ #include #include +#include "xe_ggtt.h" #include "xe_pm.h" #include "xe_sa.h" #include "xe_tile_debugfs.h" @@ -90,6 +91,11 @@ static int tile_debugfs_show_with_rpm(struct seq_file *m, void *data) return ret; } +static int ggtt(struct xe_tile *tile, struct drm_printer *p) +{ + return xe_ggtt_dump(tile->mem.ggtt, p); +} + static int sa_info(struct xe_tile *tile, struct drm_printer *p) { drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p, @@ -100,6 +106,7 @@ static int sa_info(struct xe_tile *tile, struct drm_printer *p) /* only for debugfs files which can be safely used on the VF */ static const struct drm_info_list vf_safe_debugfs_list[] = { + { "ggtt", .show = tile_debugfs_show_with_rpm, .data = ggtt }, { "sa_info", .show = tile_debugfs_show_with_rpm, .data = sa_info }, }; -- cgit v1.2.3 From 0ab7747c2dcea1843dc54533122e577f96ec0223 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 19 Sep 2025 18:04:30 +0200 Subject: drm/xe/debugfs: Improve .show() helper for GT-based attributes Like we did for tile-based attributes, introduce separate show() helper that implicitly takes an RPM reference prior to the call to the actual print() function. This translates into some savings. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250919160430.573-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 116 ++++++++++++++----------------------- drivers/gpu/drm/xe/xe_gt_debugfs.h | 1 + 2 files changed, 46 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index e4eba91cb83d..b9176d4398e1 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -35,6 +35,11 @@ #include "xe_uc_debugfs.h" #include "xe_wa.h" +static struct xe_gt *node_to_gt(struct drm_info_node *node) +{ + return node->dent->d_parent->d_inode->i_private; +} + /** * xe_gt_debugfs_simple_show - A show callback for struct drm_info_list * @m: the &seq_file @@ -77,8 +82,7 @@ int xe_gt_debugfs_simple_show(struct seq_file *m, void *data) { struct drm_printer p = drm_seq_file_printer(m); struct drm_info_node *node = m->private; - struct dentry *parent = node->dent->d_parent; - struct xe_gt *gt = parent->d_inode->i_private; + struct xe_gt *gt = node_to_gt(node); int (*print)(struct xe_gt *, struct drm_printer *) = node->info_ent->data; if (WARN_ON(!print)) @@ -87,15 +91,36 @@ int xe_gt_debugfs_simple_show(struct seq_file *m, void *data) return print(gt, &p); } -static int hw_engines(struct xe_gt *gt, struct drm_printer *p) +/** + * xe_gt_debugfs_show_with_rpm - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * Similar to xe_gt_debugfs_simple_show() but implicitly takes a RPM ref. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_debugfs_show_with_rpm(struct seq_file *m, void *data) { + struct drm_info_node *node = m->private; + struct xe_gt *gt = node_to_gt(node); struct xe_device *xe = gt_to_xe(gt); + int ret; + + xe_pm_runtime_get(xe); + ret = xe_gt_debugfs_simple_show(m, data); + xe_pm_runtime_put(xe); + + return ret; +} + +static int hw_engines(struct xe_gt *gt, struct drm_printer *p) +{ struct xe_hw_engine *hwe; enum xe_hw_engine_id id; unsigned int fw_ref; int ret = 0; - xe_pm_runtime_get(xe); fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { ret = -ETIMEDOUT; @@ -107,37 +132,19 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p) fw_put: xe_force_wake_put(gt_to_fw(gt), fw_ref); - xe_pm_runtime_put(xe); - - return ret; -} - -static int powergate_info(struct xe_gt *gt, struct drm_printer *p) -{ - int ret; - - xe_pm_runtime_get(gt_to_xe(gt)); - ret = xe_gt_idle_pg_print(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); return ret; } static int topology(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_topology_dump(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int steering(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_mcr_steering_dump(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } @@ -146,8 +153,6 @@ static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) struct xe_hw_engine *hwe; enum xe_hw_engine_id id; - xe_pm_runtime_get(gt_to_xe(gt)); - xe_reg_sr_dump(>->reg_sr, p); drm_printf(p, "\n"); @@ -165,98 +170,66 @@ static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) for_each_hw_engine(hwe, gt, id) xe_reg_whitelist_dump(&hwe->reg_whitelist, p); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int workarounds(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_wa_dump(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int tunings(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_tuning_dump(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int pat(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_pat_dump(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int mocs(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_mocs_dump(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_RENDER); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int ccs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COMPUTE); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int bcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COPY); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int vcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_DECODE); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_ENHANCE); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int hwconfig(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_guc_hwconfig_dump(>->uc.guc, p); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } @@ -266,25 +239,26 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p) * - without access to the PF specific data */ static const struct drm_info_list vf_safe_debugfs_list[] = { - {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, - {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, - {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, - {"tunings", .show = xe_gt_debugfs_simple_show, .data = tunings}, - {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc}, - {"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc}, - {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, - {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, - {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, - {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, + { "topology", .show = xe_gt_debugfs_show_with_rpm, .data = topology }, + { "register-save-restore", + .show = xe_gt_debugfs_show_with_rpm, .data = register_save_restore }, + { "workarounds", .show = xe_gt_debugfs_show_with_rpm, .data = workarounds }, + { "tunings", .show = xe_gt_debugfs_show_with_rpm, .data = tunings }, + { "default_lrc_rcs", .show = xe_gt_debugfs_show_with_rpm, .data = rcs_default_lrc }, + { "default_lrc_ccs", .show = xe_gt_debugfs_show_with_rpm, .data = ccs_default_lrc }, + { "default_lrc_bcs", .show = xe_gt_debugfs_show_with_rpm, .data = bcs_default_lrc }, + { "default_lrc_vcs", .show = xe_gt_debugfs_show_with_rpm, .data = vcs_default_lrc }, + { "default_lrc_vecs", .show = xe_gt_debugfs_show_with_rpm, .data = vecs_default_lrc }, + { "hwconfig", .show = xe_gt_debugfs_show_with_rpm, .data = hwconfig }, }; /* everything else should be added here */ static const struct drm_info_list pf_only_debugfs_list[] = { - {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, - {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs}, - {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, - {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info}, - {"steering", .show = xe_gt_debugfs_simple_show, .data = steering}, + { "hw_engines", .show = xe_gt_debugfs_show_with_rpm, .data = hw_engines }, + { "mocs", .show = xe_gt_debugfs_show_with_rpm, .data = mocs }, + { "pat", .show = xe_gt_debugfs_show_with_rpm, .data = pat }, + { "powergate_info", .show = xe_gt_debugfs_show_with_rpm, .data = xe_gt_idle_pg_print }, + { "steering", .show = xe_gt_debugfs_show_with_rpm, .data = steering }, }; static ssize_t write_to_gt_call(const char __user *userbuf, size_t count, loff_t *ppos, diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h index 05a6cc93c78c..32ee3264051b 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.h +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h @@ -11,5 +11,6 @@ struct xe_gt; void xe_gt_debugfs_register(struct xe_gt *gt); int xe_gt_debugfs_simple_show(struct seq_file *m, void *data); +int xe_gt_debugfs_show_with_rpm(struct seq_file *m, void *data); #endif -- cgit v1.2.3 From 2de80e2da74b402a9d838b8e729cd01cf94cdcbc Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 22 Sep 2025 12:12:07 +0200 Subject: drm/xe/tests: Fix build break on clang 16.0.6 The following error was reported when building with clang 16.0.6: In file included from drivers/gpu/drm/xe/xe_pci.c:1104: >> drivers/gpu/drm/xe/tests/xe_pci.c:214:2: error: initializer \ element is not a compile-time constant graphics_ip_xelp, ^~~~~~~~~~~~~~~~ drivers/gpu/drm/xe/tests/xe_pci.c:221:2: error: initializer \ element is not a compile-time constant media_ip_xem, ^~~~~~~~~~~~ 2 errors generated. Fix that by explicit re-definition of pre-GMDID IPs, as there are not so many of them. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202509192041.tQwdE4DS-lkp@intel.com/ Fixes: 5bb5258e357e ("drm/xe/tests: Add pre-GMDID IP descriptors to param generators") Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Cc: Nathan Chancellor Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250922101207.192028-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/tests/xe_pci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index aa29ac759d5d..0f136bc85b76 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -211,15 +211,15 @@ static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc) * param generator can be used for both */ static const struct xe_ip pre_gmdid_graphics_ips[] = { - graphics_ip_xelp, - graphics_ip_xelpp, - graphics_ip_xehpg, - graphics_ip_xehpc, + { 1200, "Xe_LP", &graphics_xelp }, + { 1210, "Xe_LP+", &graphics_xelp }, + { 1255, "Xe_HPG", &graphics_xehpg }, + { 1260, "Xe_HPC", &graphics_xehpc }, }; static const struct xe_ip pre_gmdid_media_ips[] = { - media_ip_xem, - media_ip_xehpm, + { 1200, "Xe_M", &media_xem }, + { 1255, "Xe_HPM", &media_xem }, }; KUNIT_ARRAY_PARAM(pre_gmdid_graphics_ip, pre_gmdid_graphics_ips, xe_ip_kunit_desc); -- cgit v1.2.3 From 4d0b035fd6dae8ee48e9c928b10f14877e595356 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 19 Sep 2025 13:20:53 +0100 Subject: drm/xe/uapi: loosen used tracking restriction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently this is hidden behind perfmon_capable() since this is technically an info leak, given that this is a system wide metric. However the granularity reported here is always PAGE_SIZE aligned, which matches what the core kernel is already willing to expose to userspace if querying how many free RAM pages there are on the system, and that doesn't need any special privileges. In addition other drm drivers seem happy to expose this. The motivation here if with oneAPI where they want to use the system wide 'used' reporting here, so not the per-client fdinfo stats. This has also come up with some perf overlay applications wanting this information. Fixes: 1105ac15d2a1 ("drm/xe/uapi: restrict system wide accounting") Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Joshua Santosh Cc: José Roberto de Souza Cc: Matthew Brost Cc: Rodrigo Vivi Cc: # v6.8+ Acked-by: Rodrigo Vivi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250919122052.420979-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_query.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index e1b603aba61b..2e9ff33ed2fe 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -276,8 +276,7 @@ static int query_mem_regions(struct xe_device *xe, mem_regions->mem_regions[0].instance = 0; mem_regions->mem_regions[0].min_page_size = PAGE_SIZE; mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT; - if (perfmon_capable()) - mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); + mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); mem_regions->num_mem_regions = 1; for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { @@ -293,13 +292,11 @@ static int query_mem_regions(struct xe_device *xe, mem_regions->mem_regions[mem_regions->num_mem_regions].total_size = man->size; - if (perfmon_capable()) { - xe_ttm_vram_get_used(man, - &mem_regions->mem_regions - [mem_regions->num_mem_regions].used, - &mem_regions->mem_regions - [mem_regions->num_mem_regions].cpu_visible_used); - } + xe_ttm_vram_get_used(man, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].used, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].cpu_visible_used); mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size = xe_ttm_vram_get_cpu_visible_size(man); -- cgit v1.2.3 From dd797967160b79cc0ca2d2eb05fc55436b66dce0 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 24 Sep 2025 08:27:10 -0700 Subject: drm/xe/configfs: Fix engine class parsing If mask is NULL, only the engine class should be accepted, so the pattern string should be completely parsed. This should fix passing e.g. rcs0 to ctx_restore_post_bb when it's only expecting the engine class. Reported-by: Jonathan Cavitt Closes: https://lore.kernel.org/r/20250922155544.67712-1-jonathan.cavitt@intel.com Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/aNJKnrCQmL9xS9Gv@stanley.mountain Fixes: e2a9854d806e ("drm/xe/configfs: Allow to select by class only") Reviewed-by: Jonathan Cavitt Reviewed-by: Raag Jadav Link: https://lore.kernel.org/r/20250924152709.659483-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_configfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 8a9b950e7a6d..beade13efbae 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -324,8 +324,8 @@ static const struct engine_info *lookup_engine_info(const char *pattern, u64 *ma continue; pattern += strlen(engine_info[i].cls); - if (!mask && !*pattern) - return &engine_info[i]; + if (!mask) + return *pattern ? NULL : &engine_info[i]; if (!strcmp(pattern, "*")) { *mask = engine_info[i].mask; -- cgit v1.2.3 From 47ca7acff4011fa322853a3612f464b959e88210 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 24 Sep 2025 08:27:11 -0700 Subject: drm/xe/configfs: Improve doc for ctx_restore* attributes Spell out the syntax instead of only using examples. Particularly important the part since that's different than engines_allowed and may confuse users. The same batch buffer is used for all engines of a certain class. Cc: Raag Jadav Reviewed-by: Raag Jadav Reviewed-by: Jonathan Cavitt Fixes: e2a9854d806e ("drm/xe/configfs: Allow to select by class only") Link: https://lore.kernel.org/r/20250924152709.659483-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_configfs.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index beade13efbae..139663423185 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -126,8 +126,20 @@ * not intended for normal execution and will taint the kernel with TAINT_TEST * when used. * - * Currently this is implemented only for post and mid context restore. - * Examples: + * The syntax allows to pass straight instructions to be executed by the engine + * in a batch buffer or set specific registers. + * + * #. Generic instruction:: + * + * cmd [[dword0] [dword1] [...]] + * + * #. Simple register setting:: + * + * reg
+ * + * Commands are saved per engine class: all instances of that class will execute + * those commands during context switch. The instruction, dword arguments, + * addresses and values are in hex format like in the examples below. * * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 after the * normal context restore:: @@ -154,7 +166,8 @@ * When using multiple lines, make sure to use a command that is * implemented with a single write syscall, like HEREDOC. * - * These attributes can only be set before binding to the device. + * Currently this is implemented only for post and mid context restore and + * these attributes can only be set before binding to the device. * * Remove devices * ============== -- cgit v1.2.3 From 09ab20c41ace0cfc00e48338ab2e2c58896fa094 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 22 Sep 2025 12:58:29 -0700 Subject: drm/xe/device: Use poll_timeout_us() to wait for lmem Now that there's a generic poll_timeout_us(), use it to wait for LMEM_INIT in GU_CNTL. Reviewed-by: Maarten Lankhorst Link: https://lore.kernel.org/r/20250922-xe-iopoll-v4-1-06438311a63f@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 65 ++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index fdb7b7498920..09f8a66c9728 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -630,16 +631,22 @@ mask_err: return err; } -static bool verify_lmem_ready(struct xe_device *xe) +static int lmem_initializing(struct xe_device *xe) { - u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT; + if (xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT) + return 0; + + if (signal_pending(current)) + return -EINTR; - return !!val; + return 1; } static int wait_for_lmem_ready(struct xe_device *xe) { - unsigned long timeout, start; + const unsigned long TIMEOUT_SEC = 60; + unsigned long prev_jiffies; + int initializing; if (!IS_DGFX(xe)) return 0; @@ -647,39 +654,35 @@ static int wait_for_lmem_ready(struct xe_device *xe) if (IS_SRIOV_VF(xe)) return 0; - if (verify_lmem_ready(xe)) + if (!lmem_initializing(xe)) return 0; drm_dbg(&xe->drm, "Waiting for lmem initialization\n"); + prev_jiffies = jiffies; - start = jiffies; - timeout = start + secs_to_jiffies(60); /* 60 sec! */ - - do { - if (signal_pending(current)) - return -EINTR; - - /* - * The boot firmware initializes local memory and - * assesses its health. If memory training fails, - * the punit will have been instructed to keep the GT powered - * down.we won't be able to communicate with it - * - * If the status check is done before punit updates the register, - * it can lead to the system being unusable. - * use a timeout and defer the probe to prevent this. - */ - if (time_after(jiffies, timeout)) { - drm_dbg(&xe->drm, "lmem not initialized by firmware\n"); - return -EPROBE_DEFER; - } - - msleep(20); - - } while (!verify_lmem_ready(xe)); + /* + * The boot firmware initializes local memory and + * assesses its health. If memory training fails, + * the punit will have been instructed to keep the GT powered + * down.we won't be able to communicate with it + * + * If the status check is done before punit updates the register, + * it can lead to the system being unusable. + * use a timeout and defer the probe to prevent this. + */ + poll_timeout_us(initializing = lmem_initializing(xe), + initializing <= 0, + 20 * USEC_PER_MSEC, TIMEOUT_SEC * USEC_PER_SEC, true); + if (initializing < 0) + return initializing; + + if (initializing) { + drm_dbg(&xe->drm, "lmem not initialized by firmware\n"); + return -EPROBE_DEFER; + } drm_dbg(&xe->drm, "lmem ready after %ums", - jiffies_to_msecs(jiffies - start)); + jiffies_to_msecs(jiffies - prev_jiffies)); return 0; } -- cgit v1.2.3 From b0ac4ef074ace20714a3bf96d23ae2ba61cc4439 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 22 Sep 2025 12:58:30 -0700 Subject: drm/xe/guc_pc: Use poll_timeout_us() for waiting Convert wait_for_pc_state() and wait_for_act_freq_limit() to poll_timeout_us(). This brings 2 changes in behavior: Drop the exponential wait and fix a potential much longer sleep. usleep_range() will wait anywhere between `wait` and `wait << 1`, so it's not correct to assume `slept += wait`. This code is not really accurate. Pairing this with the exponential wait increase, it could be waiting much longer than intended. Reviewed-by: Vinay Belgaumkar Link: https://lore.kernel.org/r/20250922-xe-iopoll-v4-2-06438311a63f@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_pc.c | 44 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 53fdf59524c4..3c0feb50a1e2 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -130,26 +131,16 @@ static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc) FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count)) static int wait_for_pc_state(struct xe_guc_pc *pc, - enum slpc_global_state state, + enum slpc_global_state target_state, int timeout_ms) { - int timeout_us = 1000 * timeout_ms; - int slept, wait = 10; + enum slpc_global_state state; xe_device_assert_mem_access(pc_to_xe(pc)); - for (slept = 0; slept < timeout_us;) { - if (slpc_shared_data_read(pc, header.global_state) == state) - return 0; - - usleep_range(wait, wait << 1); - slept += wait; - wait <<= 1; - if (slept + wait > timeout_us) - wait = timeout_us - slept; - } - - return -ETIMEDOUT; + return poll_timeout_us(state = slpc_shared_data_read(pc, header.global_state), + state == target_state, + 20, timeout_ms * USEC_PER_MSEC, false); } static int wait_for_flush_complete(struct xe_guc_pc *pc) @@ -164,24 +155,15 @@ static int wait_for_flush_complete(struct xe_guc_pc *pc) return 0; } -static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq) +static int wait_for_act_freq_max_limit(struct xe_guc_pc *pc, u32 max_limit) { - int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC; - int slept, wait = 10; - - for (slept = 0; slept < timeout_us;) { - if (xe_guc_pc_get_act_freq(pc) <= freq) - return 0; - - usleep_range(wait, wait << 1); - slept += wait; - wait <<= 1; - if (slept + wait > timeout_us) - wait = timeout_us - slept; - } + u32 freq; - return -ETIMEDOUT; + return poll_timeout_us(freq = xe_guc_pc_get_act_freq(pc), + freq <= max_limit, + 20, SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC, false); } + static int pc_action_reset(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = pc_to_ct(pc); @@ -983,7 +965,7 @@ void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) * Wait for actual freq to go below the flush cap: even if the previous * max was below cap, the current one might still be above it */ - ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + ret = wait_for_act_freq_max_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); if (ret) xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); -- cgit v1.2.3 From 2a16f47dcc75e7bfd98291f0951c5e864294341e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 22 Sep 2025 12:58:31 -0700 Subject: drm/xe/guc: Drop helper to read freq As the forcewake is already held during GuC load, there's no need to use a helper function to call xe_guc_pc_get_cur_freq(). Just call xe_guc_pc_get_cur_freq_fw() directly. Suggested-by: Maarten Lankhorst Reviewed-by: Maarten Lankhorst Link: https://lore.kernel.org/r/20250922-xe-iopoll-v4-3-06438311a63f@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 00789844ea4d..708e08fb0779 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1017,14 +1017,6 @@ static int guc_load_done(u32 status) return 0; } -static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc) -{ - u32 freq; - int ret = xe_guc_pc_get_cur_freq(guc_pc, &freq); - - return ret ? ret : freq; -} - /* * Wait for the GuC to start up. * @@ -1102,7 +1094,7 @@ static int guc_wait_ucode(struct xe_guc *guc) xe_gt_dbg(gt, "load still in progress, timeouts = %d, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n", count, xe_guc_pc_get_act_freq(guc_pc), - guc_pc_get_cur_freq(guc_pc), status, + xe_guc_pc_get_cur_freq_fw(guc_pc), status, REG_FIELD_GET(GS_BOOTROM_MASK, status), REG_FIELD_GET(GS_UKERNEL_MASK, status)); } while (1); @@ -1113,7 +1105,7 @@ static int guc_wait_ucode(struct xe_guc *guc) xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz), done = %d\n", status, delta_ms, xe_guc_pc_get_act_freq(guc_pc), - guc_pc_get_cur_freq(guc_pc), load_done); + xe_guc_pc_get_cur_freq_fw(guc_pc), load_done); xe_gt_err(gt, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", REG_FIELD_GET(GS_MIA_IN_RESET, status), bootrom, ukernel, @@ -1167,11 +1159,11 @@ static int guc_wait_ucode(struct xe_guc *guc) xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n", delta_ms, status, count); xe_gt_warn(gt, "excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n", - xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc), + xe_guc_pc_get_act_freq(guc_pc), xe_guc_pc_get_cur_freq_fw(guc_pc), before_freq, xe_gt_throttle_get_limit_reasons(gt)); } else { xe_gt_dbg(gt, "init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X, timeouts = %d\n", - delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc), + delta_ms, xe_guc_pc_get_act_freq(guc_pc), xe_guc_pc_get_cur_freq_fw(guc_pc), before_freq, status, count); } -- cgit v1.2.3 From abde96d8442de131fc3af647fdc63a0877dc99b6 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 22 Sep 2025 12:58:32 -0700 Subject: drm/xe/guc: Extract function to print load error Move the error parsing and print out of guc_wait_ucode() into a helper to clean up the wait function. Since now the `load_done != 1` condition has a return statement, also simplify the if/else chain. Reviewed-by: John Harrison # v2 Link: https://lore.kernel.org/r/20250922-xe-iopoll-v4-4-06438311a63f@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc.c | 103 ++++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 708e08fb0779..618a005f3bcc 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1047,6 +1047,54 @@ static int guc_load_done(u32 status) #endif #define GUC_LOAD_TIME_WARN_MS 200 +static void print_load_status_err(struct xe_gt *gt, u32 status) +{ + struct xe_mmio *mmio = >->mmio; + u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status); + u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status); + + xe_gt_err(gt, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + bootrom, ukernel, + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + + switch (bootrom) { + case XE_BOOTROM_STATUS_NO_KEY_FOUND: + xe_gt_err(gt, "invalid key requested, header = 0x%08X\n", + xe_mmio_read32(mmio, GUC_HEADER_INFO)); + break; + case XE_BOOTROM_STATUS_RSA_FAILED: + xe_gt_err(gt, "firmware signature verification failed\n"); + break; + case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE: + xe_gt_err(gt, "firmware production part check failure\n"); + break; + } + + switch (ukernel) { + case XE_GUC_LOAD_STATUS_HWCONFIG_START: + xe_gt_err(gt, "still extracting hwconfig table.\n"); + break; + case XE_GUC_LOAD_STATUS_EXCEPTION: + xe_gt_err(gt, "firmware exception. EIP: %#x\n", + xe_mmio_read32(mmio, SOFT_SCRATCH(13))); + break; + case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID: + xe_gt_err(gt, "illegal init/ADS data\n"); + break; + case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: + xe_gt_err(gt, "illegal register in save/restore workaround list\n"); + break; + case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR: + xe_gt_err(gt, "illegal workaround KLV data\n"); + break; + case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG: + xe_gt_err(gt, "illegal feature flag specified\n"); + break; + } +} + static int guc_wait_ucode(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); @@ -1100,62 +1148,15 @@ static int guc_wait_ucode(struct xe_guc *guc) } while (1); if (load_done != 1) { - u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status); - u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status); - xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz), done = %d\n", status, delta_ms, xe_guc_pc_get_act_freq(guc_pc), xe_guc_pc_get_cur_freq_fw(guc_pc), load_done); - xe_gt_err(gt, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", - REG_FIELD_GET(GS_MIA_IN_RESET, status), - bootrom, ukernel, - REG_FIELD_GET(GS_MIA_MASK, status), - REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); - - switch (bootrom) { - case XE_BOOTROM_STATUS_NO_KEY_FOUND: - xe_gt_err(gt, "invalid key requested, header = 0x%08X\n", - xe_mmio_read32(mmio, GUC_HEADER_INFO)); - break; - - case XE_BOOTROM_STATUS_RSA_FAILED: - xe_gt_err(gt, "firmware signature verification failed\n"); - break; - - case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE: - xe_gt_err(gt, "firmware production part check failure\n"); - break; - } - - switch (ukernel) { - case XE_GUC_LOAD_STATUS_HWCONFIG_START: - xe_gt_err(gt, "still extracting hwconfig table.\n"); - break; - - case XE_GUC_LOAD_STATUS_EXCEPTION: - xe_gt_err(gt, "firmware exception. EIP: %#x\n", - xe_mmio_read32(mmio, SOFT_SCRATCH(13))); - break; - - case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID: - xe_gt_err(gt, "illegal init/ADS data\n"); - break; - - case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: - xe_gt_err(gt, "illegal register in save/restore workaround list\n"); - break; - - case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR: - xe_gt_err(gt, "illegal workaround KLV data\n"); - break; - - case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG: - xe_gt_err(gt, "illegal feature flag specified\n"); - break; - } + print_load_status_err(gt, status); return -EPROTO; - } else if (delta_ms > GUC_LOAD_TIME_WARN_MS) { + } + + if (delta_ms > GUC_LOAD_TIME_WARN_MS) { xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n", delta_ms, status, count); xe_gt_warn(gt, "excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n", -- cgit v1.2.3 From a4916b4da44812384388ac09a2baf9da461dbc30 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 22 Sep 2025 12:58:33 -0700 Subject: drm/xe/guc: Refactor GuC load to use poll_timeout_us() Currently there are 2 wait loops for loading GuC: one in xe_mmio_wait32_not() and one guc_wait_ucode(). Now that there's a generic poll_timeout_us(), refactor the code to use that to be more readable. Main change in behavior is that there's no exponential wait anymore: that is now replaced by a 10msec retry. Reviewed-by: John Harrison Link: https://lore.kernel.org/r/20250922-xe-iopoll-v4-5-06438311a63f@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc.c | 210 +++++++++++++++++++------------------------- 1 file changed, 92 insertions(+), 118 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 618a005f3bcc..d5adbbb013ec 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -5,6 +5,7 @@ #include "xe_guc.h" +#include #include #include @@ -970,82 +971,27 @@ static int guc_xfer_rsa(struct xe_guc *guc) return 0; } -/* - * Check a previously read GuC status register (GUC_STATUS) looking for - * known terminal states (either completion or failure) of either the - * microkernel status field or the boot ROM status field. Returns +1 for - * successful completion, -1 for failure and 0 for any intermediate state. - */ -static int guc_load_done(u32 status) -{ - u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, status); - u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, status); - - switch (uk_val) { - case XE_GUC_LOAD_STATUS_READY: - return 1; - - case XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH: - case XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH: - case XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE: - case XE_GUC_LOAD_STATUS_HWCONFIG_ERROR: - case XE_GUC_LOAD_STATUS_BOOTROM_VERSION_MISMATCH: - case XE_GUC_LOAD_STATUS_DPC_ERROR: - case XE_GUC_LOAD_STATUS_EXCEPTION: - case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID: - case XE_GUC_LOAD_STATUS_MPU_DATA_INVALID: - case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: - case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR: - case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG: - return -1; - } - - switch (br_val) { - case XE_BOOTROM_STATUS_NO_KEY_FOUND: - case XE_BOOTROM_STATUS_RSA_FAILED: - case XE_BOOTROM_STATUS_PAVPC_FAILED: - case XE_BOOTROM_STATUS_WOPCM_FAILED: - case XE_BOOTROM_STATUS_LOADLOC_FAILED: - case XE_BOOTROM_STATUS_JUMP_FAILED: - case XE_BOOTROM_STATUS_RC6CTXCONFIG_FAILED: - case XE_BOOTROM_STATUS_MPUMAP_INCORRECT: - case XE_BOOTROM_STATUS_EXCEPTION: - case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE: - return -1; - } - - return 0; -} - /* * Wait for the GuC to start up. * * Measurements indicate this should take no more than 20ms (assuming the GT * clock is at maximum frequency). However, thermal throttling and other issues * can prevent the clock hitting max and thus making the load take significantly - * longer. Allow up to 200ms as a safety margin for real world worst case situations. - * - * However, bugs anywhere from KMD to GuC to PCODE to fan failure in a CI farm can - * lead to even longer times. E.g. if the GT is clamped to minimum frequency then - * the load times can be in the seconds range. So the timeout is increased for debug - * builds to ensure that problems can be correctly analysed. For release builds, the - * timeout is kept short so that users don't wait forever to find out that there is a - * problem. In either case, if the load took longer than is reasonable even with some - * 'sensible' throttling, then flag a warning because something is not right. + * longer. Allow up to 3s as a safety margin in normal builds. For + * CONFIG_DRM_XE_DEBUG allow up to 10s to account for slower execution, issues + * in PCODE, driver, fan, etc. * - * Note that there is a limit on how long an individual usleep_range() can wait for, - * hence longer waits require wrapping a shorter wait in a loop. - * - * Note that the only reason an end user should hit the shorter timeout is in case of - * extreme thermal throttling. And a system that is that hot during boot is probably - * dead anyway! + * Keep checking the GUC_STATUS every 10ms with a debug message every 100 + * attempts as a "I'm slow, but alive" message. Regardless, if it takes more + * than 200ms, emit a warning. */ + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) -#define GUC_LOAD_RETRY_LIMIT 20 +#define GUC_LOAD_TIMEOUT_SEC 20 #else -#define GUC_LOAD_RETRY_LIMIT 3 +#define GUC_LOAD_TIMEOUT_SEC 3 #endif -#define GUC_LOAD_TIME_WARN_MS 200 +#define GUC_LOAD_TIME_WARN_MSEC 200 static void print_load_status_err(struct xe_gt *gt, u32 status) { @@ -1095,77 +1041,105 @@ static void print_load_status_err(struct xe_gt *gt, u32 status) } } +/* + * Check GUC_STATUS looking for known terminal states (either completion or + * failure) of either the microkernel status field or the boot ROM status field. + * + * Returns 1 for successful completion, -1 for failure and 0 for any + * intermediate state. + */ +static int guc_load_done(struct xe_gt *gt, u32 *status, u32 *tries) +{ + u32 ukernel, bootrom; + + *status = xe_mmio_read32(>->mmio, GUC_STATUS); + ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, *status); + bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, *status); + + switch (ukernel) { + case XE_GUC_LOAD_STATUS_READY: + return 1; + case XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH: + case XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH: + case XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE: + case XE_GUC_LOAD_STATUS_HWCONFIG_ERROR: + case XE_GUC_LOAD_STATUS_BOOTROM_VERSION_MISMATCH: + case XE_GUC_LOAD_STATUS_DPC_ERROR: + case XE_GUC_LOAD_STATUS_EXCEPTION: + case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID: + case XE_GUC_LOAD_STATUS_MPU_DATA_INVALID: + case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: + case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR: + case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG: + return -1; + } + + switch (bootrom) { + case XE_BOOTROM_STATUS_NO_KEY_FOUND: + case XE_BOOTROM_STATUS_RSA_FAILED: + case XE_BOOTROM_STATUS_PAVPC_FAILED: + case XE_BOOTROM_STATUS_WOPCM_FAILED: + case XE_BOOTROM_STATUS_LOADLOC_FAILED: + case XE_BOOTROM_STATUS_JUMP_FAILED: + case XE_BOOTROM_STATUS_RC6CTXCONFIG_FAILED: + case XE_BOOTROM_STATUS_MPUMAP_INCORRECT: + case XE_BOOTROM_STATUS_EXCEPTION: + case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE: + return -1; + } + + if (++*tries >= 100) { + struct xe_guc_pc *guc_pc = >->uc.guc.pc; + + *tries = 0; + xe_gt_dbg(gt, "GuC load still in progress, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n", + xe_guc_pc_get_act_freq(guc_pc), + xe_guc_pc_get_cur_freq_fw(guc_pc), + *status, ukernel, bootrom); + } + + return 0; +} + static int guc_wait_ucode(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); - struct xe_mmio *mmio = >->mmio; struct xe_guc_pc *guc_pc = >->uc.guc.pc; - ktime_t before, after, delta; - int load_done; - u32 status = 0; - int count = 0; + u32 before_freq, act_freq, cur_freq; + u32 status = 0, tries = 0; + ktime_t before; u64 delta_ms; - u32 before_freq; + int ret; before_freq = xe_guc_pc_get_act_freq(guc_pc); before = ktime_get(); - /* - * Note, can't use any kind of timing information from the call to xe_mmio_wait. - * It could return a thousand intermediate stages at random times. Instead, must - * manually track the total time taken and locally implement the timeout. - */ - do { - u32 last_status = status & (GS_UKERNEL_MASK | GS_BOOTROM_MASK); - int ret; - /* - * Wait for any change (intermediate or terminal) in the status register. - * Note, the return value is a don't care. The only failure code is timeout - * but the timeouts need to be accumulated over all the intermediate partial - * timeouts rather than allowing a huge timeout each time. So basically, need - * to treat a timeout no different to a value change. - */ - ret = xe_mmio_wait32_not(mmio, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK, - last_status, 1000 * 1000, &status, false); - if (ret < 0) - count++; - after = ktime_get(); - delta = ktime_sub(after, before); - delta_ms = ktime_to_ms(delta); - - load_done = guc_load_done(status); - if (load_done != 0) - break; + ret = poll_timeout_us(ret = guc_load_done(gt, &status, &tries), ret, + 10 * USEC_PER_MSEC, + GUC_LOAD_TIMEOUT_SEC * USEC_PER_SEC, false); - if (delta_ms >= (GUC_LOAD_RETRY_LIMIT * 1000)) - break; - - xe_gt_dbg(gt, "load still in progress, timeouts = %d, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n", - count, xe_guc_pc_get_act_freq(guc_pc), - xe_guc_pc_get_cur_freq_fw(guc_pc), status, - REG_FIELD_GET(GS_BOOTROM_MASK, status), - REG_FIELD_GET(GS_UKERNEL_MASK, status)); - } while (1); + delta_ms = ktime_to_ms(ktime_sub(ktime_get(), before)); + act_freq = xe_guc_pc_get_act_freq(guc_pc); + cur_freq = xe_guc_pc_get_cur_freq_fw(guc_pc); - if (load_done != 1) { - xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz), done = %d\n", + if (ret) { + xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz)\n", status, delta_ms, xe_guc_pc_get_act_freq(guc_pc), - xe_guc_pc_get_cur_freq_fw(guc_pc), load_done); + xe_guc_pc_get_cur_freq_fw(guc_pc)); print_load_status_err(gt, status); return -EPROTO; } - if (delta_ms > GUC_LOAD_TIME_WARN_MS) { - xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n", - delta_ms, status, count); - xe_gt_warn(gt, "excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n", - xe_guc_pc_get_act_freq(guc_pc), xe_guc_pc_get_cur_freq_fw(guc_pc), - before_freq, xe_gt_throttle_get_limit_reasons(gt)); + if (delta_ms > GUC_LOAD_TIME_WARN_MSEC) { + xe_gt_warn(gt, "GuC load: excessive init time: %lldms! [status = 0x%08X]\n", + delta_ms, status); + xe_gt_warn(gt, "GuC load: excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n", + act_freq, cur_freq, before_freq, + xe_gt_throttle_get_limit_reasons(gt)); } else { - xe_gt_dbg(gt, "init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X, timeouts = %d\n", - delta_ms, xe_guc_pc_get_act_freq(guc_pc), xe_guc_pc_get_cur_freq_fw(guc_pc), - before_freq, status, count); + xe_gt_dbg(gt, "GuC load: init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X\n", + delta_ms, act_freq, cur_freq, before_freq, status); } return 0; -- cgit v1.2.3 From 662d98b8b373007fa1b08ba93fee11f6fd3e387c Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 25 Sep 2025 02:31:46 +0000 Subject: drm/xe/hw_engine_group: Fix double write lock release in error path In xe_hw_engine_group_get_mode(), a write lock is acquired before calling switch_mode(), which in turn invokes xe_hw_engine_group_suspend_faulting_lr_jobs(). On failure inside xe_hw_engine_group_suspend_faulting_lr_jobs(), the write lock is released there, and then again in xe_hw_engine_group_get_mode(), leading to a double release. Fix this by keeping both acquire and release operation in xe_hw_engine_group_get_mode(). Fixes: 770bd1d34113 ("drm/xe/hw_engine_group: Ensure safe transition between execution modes") Cc: Francois Dugast Signed-off-by: Shuicheng Lin Reviewed-by: Francois Dugast Link: https://lore.kernel.org/r/20250925023145.1203004-2-shuicheng.lin@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine_group.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 58bee3ffe881..fa4db5f23342 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -213,17 +213,13 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group err = q->ops->suspend_wait(q); if (err) - goto err_suspend; + return err; } if (need_resume) xe_hw_engine_group_resume_faulting_lr_jobs(group); return 0; - -err_suspend: - up_write(&group->mode_sem); - return err; } /** -- cgit v1.2.3 From b88bb1eefa88f0cefc00fe5e78b1186cd8f9db78 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 28 Sep 2025 19:48:10 +0200 Subject: drm/xe/vf: Rename sriov_update_device_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a VF only function and its name should reflect that to avoid any confusion. Move the VF check to the caller side. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20250928174811.198933-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_device.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 09f8a66c9728..0421042597a2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -688,16 +688,15 @@ static int wait_for_lmem_ready(struct xe_device *xe) } ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */ -static void sriov_update_device_info(struct xe_device *xe) +static void vf_update_device_info(struct xe_device *xe) { + xe_assert(xe, IS_SRIOV_VF(xe)); /* disable features that are not available/applicable to VFs */ - if (IS_SRIOV_VF(xe)) { - xe->info.probe_display = 0; - xe->info.has_heci_cscfi = 0; - xe->info.has_heci_gscfi = 0; - xe->info.skip_guc_pc = 1; - xe->info.skip_pcode = 1; - } + xe->info.probe_display = 0; + xe->info.has_heci_cscfi = 0; + xe->info.has_heci_gscfi = 0; + xe->info.skip_guc_pc = 1; + xe->info.skip_pcode = 1; } static int xe_device_vram_alloc(struct xe_device *xe) @@ -738,7 +737,8 @@ int xe_device_probe_early(struct xe_device *xe) xe_sriov_probe_early(xe); - sriov_update_device_info(xe); + if (IS_SRIOV_VF(xe)) + vf_update_device_info(xe); err = xe_pcode_probe_early(xe); if (err || xe_survivability_mode_is_requested(xe)) { -- cgit v1.2.3 From e35e288090f362be88d77b60d9846cea15df173e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 28 Sep 2025 19:48:11 +0200 Subject: drm/xe/vf: Don't claim support for firmware late-bind if VF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In general, the VFs can't load firmwares so attempt to initialize the firmware late-bind component leads to errors like: [] xe 0000:03:00.1: [drm] *ERROR* Late bind component not bound Fixes: 918bd789d62e ("drm/xe/xe_late_bind_fw: Introduce xe_late_bind_fw") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6190 Signed-off-by: Michal Wajdeczko Cc: Badal Nilawar Reviewed-by: Piotr Piórkowski Reviewed-by: Badal Nilawar Link: https://lore.kernel.org/r/20250928174811.198933-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 0421042597a2..386940323630 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -695,6 +695,7 @@ static void vf_update_device_info(struct xe_device *xe) xe->info.probe_display = 0; xe->info.has_heci_cscfi = 0; xe->info.has_heci_gscfi = 0; + xe->info.has_late_bind = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1; } -- cgit v1.2.3 From 1238b84ea305d5bb891761f3bd5f58763e2a6778 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 28 Sep 2025 16:00:23 +0200 Subject: drm/xe/pf: Promote PF debugfs function to its own file In upcoming patches, we will build on the PF separate debugfs tree for all SR-IOV related files and this new code will need dedicated file. To minimize large diffs later, move existing function now as-is, so any future modifications will be done directly in target file. Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Cc: Rodrigo Vivi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250928140029.198847-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_sriov_pf.c | 42 ------------------------- drivers/gpu/drm/xe/xe_sriov_pf.h | 5 --- drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c | 54 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h | 18 +++++++++++ 6 files changed, 74 insertions(+), 48 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index d9c6cf0f189e..534355c23e80 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -174,6 +174,7 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt_ml.o \ xe_pci_sriov.o \ xe_sriov_pf.o \ + xe_sriov_pf_debugfs.o \ xe_sriov_pf_service.o # include helpers for tests even when XE is built-in diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index cd977dbd1ef6..dba0a9c4a4d2 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -23,7 +23,7 @@ #include "xe_psmi.h" #include "xe_pxp_debugfs.h" #include "xe_sriov.h" -#include "xe_sriov_pf.h" +#include "xe_sriov_pf_debugfs.h" #include "xe_sriov_vf.h" #include "xe_step.h" #include "xe_tile_debugfs.h" diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 27ddf3cc80e9..4698348c010a 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -146,45 +146,3 @@ void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p) drm_printf(p, "supported: %u\n", xe->sriov.pf.driver_max_vfs); drm_printf(p, "enabled: %u\n", pci_num_vf(pdev)); } - -static int simple_show(struct seq_file *m, void *data) -{ - struct drm_printer p = drm_seq_file_printer(m); - struct drm_info_node *node = m->private; - struct dentry *parent = node->dent->d_parent; - struct xe_device *xe = parent->d_inode->i_private; - void (*print)(struct xe_device *, struct drm_printer *) = node->info_ent->data; - - print(xe, &p); - return 0; -} - -static const struct drm_info_list debugfs_list[] = { - { .name = "vfs", .show = simple_show, .data = xe_sriov_pf_print_vfs_summary }, - { .name = "versions", .show = simple_show, .data = xe_sriov_pf_service_print_versions }, -}; - -/** - * xe_sriov_pf_debugfs_register - Register PF debugfs attributes. - * @xe: the &xe_device - * @root: the root &dentry - * - * Prepare debugfs attributes exposed by the PF. - */ -void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) -{ - struct drm_minor *minor = xe->drm.primary; - struct dentry *parent; - - /* - * /sys/kernel/debug/dri/0/ - * ├── pf - * │   ├── ... - */ - parent = debugfs_create_dir("pf", root); - if (IS_ERR(parent)) - return; - parent->d_inode->i_private = xe; - - drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), parent, minor); -} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h index e3b34f8f5e04..e6cf930ecde4 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -16,7 +16,6 @@ struct xe_device; bool xe_sriov_pf_readiness(struct xe_device *xe); int xe_sriov_pf_init_early(struct xe_device *xe); int xe_sriov_pf_wait_ready(struct xe_device *xe); -void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root); void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); #else static inline bool xe_sriov_pf_readiness(struct xe_device *xe) @@ -28,10 +27,6 @@ static inline int xe_sriov_pf_init_early(struct xe_device *xe) { return 0; } - -static inline void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) -{ -} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c new file mode 100644 index 000000000000..2a1316048439 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include +#include + +#include "xe_device_types.h" +#include "xe_sriov_pf.h" +#include "xe_sriov_pf_debugfs.h" +#include "xe_sriov_pf_service.h" + +static int simple_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct xe_device *xe = parent->d_inode->i_private; + void (*print)(struct xe_device *, struct drm_printer *) = node->info_ent->data; + + print(xe, &p); + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + { .name = "vfs", .show = simple_show, .data = xe_sriov_pf_print_vfs_summary }, + { .name = "versions", .show = simple_show, .data = xe_sriov_pf_service_print_versions }, +}; + +/** + * xe_sriov_pf_debugfs_register - Register PF debugfs attributes. + * @xe: the &xe_device + * @root: the root &dentry + * + * Prepare debugfs attributes exposed by the PF. + */ +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ + struct drm_minor *minor = xe->drm.primary; + struct dentry *parent; + + /* + * /sys/kernel/debug/dri/0/ + * ├── pf + * │ ├── ... + */ + parent = debugfs_create_dir("pf", root); + if (IS_ERR(parent)) + return; + parent->d_inode->i_private = xe; + + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h new file mode 100644 index 000000000000..93db13585b82 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_DEBUGFS_H_ +#define _XE_SRIOV_PF_DEBUGFS_H_ + +struct dentry; +struct xe_device; + +#ifdef CONFIG_PCI_IOV +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root); +#else +static inline void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) { } +#endif + +#endif -- cgit v1.2.3 From 4d4af0d6cbbfaf90ade642b29e50c745906c1187 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 28 Sep 2025 16:00:24 +0200 Subject: drm/xe/pf: Create separate debugfs tree for SR-IOV files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we expose debugfs files related to SR-IOV functions together with other native files, but that approach will not scale well as we plan to add more attributes and also expose some of them on the per-tile basis. Start building separate tree for SR-IOV specific debugfs files where we can replicate similar files per every SR-IOV function: /sys/kernel/debug/dri/BDF/ ├── sriov │ ├── pf │ │ ├── tile0 │ │ │ ├── gt0 │ │ │ ├── gt1 │ │ │ : │ │ ├── tile1 │ │ : │ ├── vf1 │ │ ├── tile0 │ │ │ ├── gt0 │ │ │ ├── gt1 │ │ │ : │ │ : │ ├── vf2 │ ├── ... We will populate this new tree in upcoming patches. Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Cc: Rodrigo Vivi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250928140029.198847-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c | 58 +++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c index 2a1316048439..37cc3a297667 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c @@ -9,7 +9,9 @@ #include "xe_device_types.h" #include "xe_sriov_pf.h" #include "xe_sriov_pf_debugfs.h" +#include "xe_sriov_pf_helpers.h" #include "xe_sriov_pf_service.h" +#include "xe_sriov_printk.h" static int simple_show(struct seq_file *m, void *data) { @@ -28,27 +30,65 @@ static const struct drm_info_list debugfs_list[] = { { .name = "versions", .show = simple_show, .data = xe_sriov_pf_service_print_versions }, }; +static void pf_populate_pf(struct xe_device *xe, struct dentry *pfdent) +{ + struct drm_minor *minor = xe->drm.primary; + + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), pfdent, minor); +} + /** * xe_sriov_pf_debugfs_register - Register PF debugfs attributes. * @xe: the &xe_device * @root: the root &dentry * - * Prepare debugfs attributes exposed by the PF. + * Create separate directory that will contain all SR-IOV related files, + * organized per each SR-IOV function (PF, VF1, VF2, ..., VFn). */ void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) { - struct drm_minor *minor = xe->drm.primary; - struct dentry *parent; + int totalvfs = xe_sriov_pf_get_totalvfs(xe); + struct dentry *pfdent; + struct dentry *vfdent; + struct dentry *dent; + char vfname[16]; /* should be more than enough for "vf%u\0" and VFID(UINT_MAX) */ + unsigned int n; /* - * /sys/kernel/debug/dri/0/ - * ├── pf + * /sys/kernel/debug/dri/BDF/ + * ├── sriov # d_inode->i_private = (xe_device*) * │ ├── ... */ - parent = debugfs_create_dir("pf", root); - if (IS_ERR(parent)) + dent = debugfs_create_dir("sriov", root); + if (IS_ERR(dent)) + return; + dent->d_inode->i_private = xe; + + /* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov # d_inode->i_private = (xe_device*) + * │ ├── pf # d_inode->i_private = (xe_device*) + * │ │ ├── ... + */ + pfdent = debugfs_create_dir("pf", dent); + if (IS_ERR(pfdent)) return; - parent->d_inode->i_private = xe; + pfdent->d_inode->i_private = xe; + + pf_populate_pf(xe, pfdent); - drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), parent, minor); + /* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov # d_inode->i_private = (xe_device*) + * │ ├── vf1 # d_inode->i_private = VFID(1) + * │ ├── vf2 # d_inode->i_private = VFID(2) + * │ ├── ... + */ + for (n = 1; n <= totalvfs; n++) { + snprintf(vfname, sizeof(vfname), "vf%u", VFID(n)); + vfdent = debugfs_create_dir(vfname, dent); + if (IS_ERR(vfdent)) + return; + vfdent->d_inode->i_private = (void *)(uintptr_t)VFID(n); + } } -- cgit v1.2.3 From 5489e7d44ab3b5f4e48dc789f11521daeef74fbe Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 28 Sep 2025 16:00:25 +0200 Subject: drm/xe/pf: Populate SR-IOV debugfs tree with tiles Populate new per SR-IOV function debugfs directories with next level directories that represent tiles. There are no files yet, but we will continue updating that tree in upcoming patches. Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Cc: Rodrigo Vivi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250928140029.198847-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 3 +- drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c | 14 ++++ drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c | 98 +++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h | 15 ++++ 4 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 534355c23e80..00f7ce8e926d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -175,7 +175,8 @@ xe-$(CONFIG_PCI_IOV) += \ xe_pci_sriov.o \ xe_sriov_pf.o \ xe_sriov_pf_debugfs.o \ - xe_sriov_pf_service.o + xe_sriov_pf_service.o \ + xe_tile_sriov_pf_debugfs.o # include helpers for tests even when XE is built-in ifdef CONFIG_DRM_XE_KUNIT_TEST diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c index 37cc3a297667..2ab0b1f4818a 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c @@ -6,12 +6,14 @@ #include #include +#include "xe_device.h" #include "xe_device_types.h" #include "xe_sriov_pf.h" #include "xe_sriov_pf_debugfs.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_pf_service.h" #include "xe_sriov_printk.h" +#include "xe_tile_sriov_pf_debugfs.h" static int simple_show(struct seq_file *m, void *data) { @@ -37,6 +39,15 @@ static void pf_populate_pf(struct xe_device *xe, struct dentry *pfdent) drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), pfdent, minor); } +static void pf_populate_with_tiles(struct xe_device *xe, struct dentry *dent, unsigned int vfid) +{ + struct xe_tile *tile; + unsigned int id; + + for_each_tile(tile, xe, id) + xe_tile_sriov_pf_debugfs_populate(tile, dent, vfid); +} + /** * xe_sriov_pf_debugfs_register - Register PF debugfs attributes. * @xe: the &xe_device @@ -76,6 +87,7 @@ void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) pfdent->d_inode->i_private = xe; pf_populate_pf(xe, pfdent); + pf_populate_with_tiles(xe, pfdent, PFID); /* * /sys/kernel/debug/dri/BDF/ @@ -90,5 +102,7 @@ void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) if (IS_ERR(vfdent)) return; vfdent->d_inode->i_private = (void *)(uintptr_t)VFID(n); + + pf_populate_with_tiles(xe, vfdent, VFID(n)); } } diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c new file mode 100644 index 000000000000..91973ee9bb05 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include +#include + +#include "xe_device_types.h" +#include "xe_tile_sriov_pf_debugfs.h" +#include "xe_sriov.h" + +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov # d_inode->i_private = (xe_device*) + * │ ├── pf # d_inode->i_private = (xe_device*) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ ├── tile1 + * │ │ : : + * │ ├── vf1 # d_inode->i_private = VFID(1) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ ├── tile1 + * │ │ : : + * │ ├── vfN # d_inode->i_private = VFID(N) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ ├── tile1 + * : : : : + */ + +static void *extract_priv(struct dentry *d) +{ + return d->d_inode->i_private; +} + +__maybe_unused +static struct xe_tile *extract_tile(struct dentry *d) +{ + return extract_priv(d); +} + +static struct xe_device *extract_xe(struct dentry *d) +{ + return extract_priv(d->d_parent->d_parent); +} + +__maybe_unused +static unsigned int extract_vfid(struct dentry *d) +{ + void *pp = extract_priv(d->d_parent); + + return pp == extract_xe(d) ? PFID : (uintptr_t)pp; +} + +/** + * xe_tile_sriov_pf_debugfs_populate() - Populate SR-IOV debugfs tree with tile files. + * @tile: the &xe_tile to register + * @parent: the parent &dentry that represents the SR-IOV @vfid function + * @vfid: the VF identifier + * + * Add to the @parent directory new debugfs directory that will represent a @tile and + * populate it with files that are related to the SR-IOV @vfid function. + * + * This function can only be called on PF. + */ +void xe_tile_sriov_pf_debugfs_populate(struct xe_tile *tile, struct dentry *parent, + unsigned int vfid) +{ + struct xe_device *xe = tile->xe; + struct dentry *dent; + char name[10]; /* should be enough up to "tile%u\0" for 2^16 - 1 */ + + xe_tile_assert(tile, IS_SRIOV_PF(xe)); + xe_tile_assert(tile, extract_priv(parent->d_parent) == xe); + xe_tile_assert(tile, extract_priv(parent) == tile->xe || + (uintptr_t)extract_priv(parent) == vfid); + + /* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * │ ├── pf # parent, d_inode->i_private = (xe_device*) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ ├── tile1 + * │ │ : : + * │ ├── vf1 # parent, d_inode->i_private = VFID(1) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ ├── tile1 + * : : : : + */ + snprintf(name, sizeof(name), "tile%u", tile->id); + dent = debugfs_create_dir(name, parent); + if (IS_ERR(dent)) + return; + dent->d_inode->i_private = tile; + + xe_tile_assert(tile, extract_tile(dent) == tile); + xe_tile_assert(tile, extract_vfid(dent) == vfid); + xe_tile_assert(tile, extract_xe(dent) == xe); +} diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h new file mode 100644 index 000000000000..55d179c44634 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TILE_SRIOV_PF_DEBUGFS_H_ +#define _XE_TILE_SRIOV_PF_DEBUGFS_H_ + +struct dentry; +struct xe_tile; + +void xe_tile_sriov_pf_debugfs_populate(struct xe_tile *tile, struct dentry *parent, + unsigned int vfid); + +#endif -- cgit v1.2.3 From 9a719bbf8d60893c0cb21b52e1fb6a8fb07391ea Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 28 Sep 2025 16:00:26 +0200 Subject: drm/xe/pf: Move SR-IOV GT debugfs files to new tree Instead of expanding GT debugfs directories with large number of SR-IOV files, as those are replicated per each SR-IOV function, move them to our new debugfs tree, organized by the function. But to avoid breaking IGT tests that use current layout, provide symlinks which could be removed once transition period is over, or we can we can leave them for convenience. Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Cc: Rodrigo Vivi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250928140029.198847-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 355 ++++++++++++++++---------- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h | 1 + drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c | 13 + 3 files changed, 234 insertions(+), 135 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 3ed245e04d0c..fbb3cb3200fb 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -25,12 +25,22 @@ #include "xe_sriov_pf.h" /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 # d_inode->i_private = gt - * │   ├── pf # d_inode->i_private = gt - * │   ├── vf1 # d_inode->i_private = VFID(1) - * :   : - * │   ├── vfN # d_inode->i_private = VFID(N) + * /sys/kernel/debug/dri/BDF/ + * ├── sriov # d_inode->i_private = (xe_device*) + * │ ├── pf # d_inode->i_private = (xe_device*) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ │ ├── gt0 # d_inode->i_private = (xe_gt*) + * │ │ │ ├── gt1 # d_inode->i_private = (xe_gt*) + * │ │ ├── tile1 + * │ │ │ : + * │ ├── vf1 # d_inode->i_private = VFID(1) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ │ ├── gt0 # d_inode->i_private = (xe_gt*) + * │ │ │ ├── gt1 # d_inode->i_private = (xe_gt*) + * │ │ ├── tile1 + * │ │ │ : + * : : + * │ ├── vfN # d_inode->i_private = VFID(N) */ static void *extract_priv(struct dentry *d) @@ -40,26 +50,31 @@ static void *extract_priv(struct dentry *d) static struct xe_gt *extract_gt(struct dentry *d) { - return extract_priv(d->d_parent); + return extract_priv(d); +} + +static struct xe_device *extract_xe(struct dentry *d) +{ + return extract_priv(d->d_parent->d_parent->d_parent); } static unsigned int extract_vfid(struct dentry *d) { - return extract_priv(d) == extract_gt(d) ? PFID : (uintptr_t)extract_priv(d); + void *priv = extract_priv(d->d_parent->d_parent); + + return priv == extract_xe(d) ? PFID : (uintptr_t)priv; } /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │   ├── pf - * │   │   ├── contexts_provisioned - * │   │   ├── doorbells_provisioned - * │   │   ├── runtime_registers - * │   │   ├── negotiated_versions - * │   │   ├── adverse_events - * ├── gt1 - * │   ├── pf - * │   │   ├── ... + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * : ├── tile0 + * : ├── gt0 + * : ├── contexts_provisioned + * ├── doorbells_provisioned + * ├── runtime_registers + * ├── adverse_events */ static const struct drm_info_list pf_info[] = { @@ -86,11 +101,13 @@ static const struct drm_info_list pf_info[] = { }; /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │   ├── pf - * │   │   ├── ggtt_available - * │   │   ├── ggtt_provisioned + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * : ├── tile0 + * : ├── gt0 + * : ├── ggtt_available + * ├── ggtt_provisioned */ static const struct drm_info_list pf_ggtt_info[] = { @@ -107,10 +124,12 @@ static const struct drm_info_list pf_ggtt_info[] = { }; /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │   ├── pf - * │   │   ├── lmem_provisioned + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * : ├── tile0 + * : ├── gt0 + * : ├── lmem_provisioned */ static const struct drm_info_list pf_lmem_info[] = { @@ -122,12 +141,14 @@ static const struct drm_info_list pf_lmem_info[] = { }; /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │   ├── pf - * │   │   ├── reset_engine - * │   │   ├── sample_period - * │   │   ├── sched_if_idle + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * : ├── tile0 + * : ├── gt0 + * : ├── reset_engine + * ├── sample_period + * ├── sched_if_idle */ #define DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(POLICY, TYPE, FORMAT) \ @@ -173,24 +194,28 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) } /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │   ├── pf - * │   │   ├── ggtt_spare - * │   │   ├── lmem_spare - * │   │   ├── doorbells_spare - * │   │   ├── contexts_spare - * │   │   ├── exec_quantum_ms - * │   │   ├── preempt_timeout_us - * │   │   ├── sched_priority - * │   ├── vf1 - * │   │   ├── ggtt_quota - * │   │   ├── lmem_quota - * │   │   ├── doorbells_quota - * │   │   ├── contexts_quota - * │   │   ├── exec_quantum_ms - * │   │   ├── preempt_timeout_us - * │   │   ├── sched_priority + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * │ ├── tile0 + * │ : ├── gt0 + * │ : ├── ggtt_spare + * │ ├── lmem_spare + * │ ├── doorbells_spare + * │ ├── contexts_spare + * │ ├── exec_quantum_ms + * │ ├── preempt_timeout_us + * │ ├── sched_priority + * ├── vf1 + * : ├── tile0 + * : ├── gt0 + * : ├── ggtt_quota + * ├── lmem_quota + * ├── doorbells_quota + * ├── contexts_quota + * ├── exec_quantum_ms + * ├── preempt_timeout_us + * ├── sched_priority */ #define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT) \ @@ -233,22 +258,26 @@ DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(sched_priority, u32, "%llu\n"); /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │   ├── pf - * │   │   ├── threshold_cat_error_count - * │   │   ├── threshold_doorbell_time_us - * │   │   ├── threshold_engine_reset_count - * │   │   ├── threshold_guc_time_us - * │   │   ├── threshold_irq_time_us - * │   │   ├── threshold_page_fault_count - * │   ├── vf1 - * │   │   ├── threshold_cat_error_count - * │   │   ├── threshold_doorbell_time_us - * │   │   ├── threshold_engine_reset_count - * │   │   ├── threshold_guc_time_us - * │   │   ├── threshold_irq_time_us - * │   │   ├── threshold_page_fault_count + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * │ ├── tile0 + * │ : ├── gt0 + * │ : ├── threshold_cat_error_count + * │ ├── threshold_doorbell_time_us + * │ ├── threshold_engine_reset_count + * │ ├── threshold_guc_time_us + * │ ├── threshold_irq_time_us + * │ ├── threshold_page_fault_count + * ├── vf1 + * : ├── tile0 + * : ├── gt0 + * : ├── threshold_cat_error_count + * ├── threshold_doorbell_time_us + * ├── threshold_engine_reset_count + * ├── threshold_guc_time_us + * ├── threshold_irq_time_us + * ├── threshold_page_fault_count */ static int set_threshold(void *data, u64 val, enum xe_guc_klv_threshold_index index) @@ -329,10 +358,12 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne } /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │   ├── vf1 - * │   │   ├── control { stop, pause, resume } + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── vf1 + * : ├── tile0 + * : ├── gt0 + * : ├── control { stop, pause, resume } */ static const struct { @@ -409,11 +440,14 @@ static const struct file_operations control_ops = { }; /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │   ├── vf1 - * │   │   ├── guc_state + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── vf1 + * : ├── tile0 + * : ├── gt0 + * : ├── guc_state */ + static ssize_t guc_state_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { @@ -447,11 +481,14 @@ static const struct file_operations guc_state_ops = { }; /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │   ├── vf1 - * │   │   ├── config_blob + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── vf1 + * : ├── tile0 + * : ├── gt0 + * : ├── config_blob */ + static ssize_t config_blob_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { @@ -521,73 +558,121 @@ static const struct file_operations config_blob_ops = { .llseek = default_llseek, }; -/** - * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs. - * @gt: the &xe_gt to register - * @root: the &dentry that represents the GT directory - * - * Register SR-IOV PF entries that are GT related and must be shown under GT debugfs. - */ -void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) +static void pf_populate_gt(struct xe_gt *gt, struct dentry *dent, unsigned int vfid) { struct xe_device *xe = gt_to_xe(gt); struct drm_minor *minor = xe->drm.primary; - int n, totalvfs = xe_sriov_pf_get_totalvfs(xe); - struct dentry *pfdentry; - struct dentry *vfdentry; - char buf[14]; /* should be enough up to "vf%u\0" for 2^32 - 1 */ - - xe_gt_assert(gt, IS_SRIOV_PF(xe)); - xe_gt_assert(gt, root->d_inode->i_private == gt); - - /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │   ├── pf - */ - pfdentry = debugfs_create_dir("pf", root); - if (IS_ERR(pfdentry)) - return; - pfdentry->d_inode->i_private = gt; - drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor); - if (xe_gt_is_main_type(gt)) { - drm_debugfs_create_files(pf_ggtt_info, - ARRAY_SIZE(pf_ggtt_info), - pfdentry, minor); - if (xe_device_has_lmtt(gt_to_xe(gt))) - drm_debugfs_create_files(pf_lmem_info, - ARRAY_SIZE(pf_lmem_info), - pfdentry, minor); - } + if (vfid) { + pf_add_config_attrs(gt, dent, vfid); - pf_add_policy_attrs(gt, pfdentry); - pf_add_config_attrs(gt, pfdentry, PFID); - - for (n = 1; n <= totalvfs; n++) { - /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │   ├── vf1 - * │   ├── vf2 - */ - snprintf(buf, sizeof(buf), "vf%u", n); - vfdentry = debugfs_create_dir(buf, root); - if (IS_ERR(vfdentry)) - break; - vfdentry->d_inode->i_private = (void *)(uintptr_t)n; - - pf_add_config_attrs(gt, vfdentry, VFID(n)); - debugfs_create_file("control", 0600, vfdentry, NULL, &control_ops); + debugfs_create_file("control", 0600, dent, NULL, &control_ops); /* for testing/debugging purposes only! */ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { debugfs_create_file("guc_state", IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400, - vfdentry, NULL, &guc_state_ops); + dent, NULL, &guc_state_ops); debugfs_create_file("config_blob", IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400, - vfdentry, NULL, &config_blob_ops); + dent, NULL, &config_blob_ops); + } + + } else { + pf_add_config_attrs(gt, dent, PFID); + pf_add_policy_attrs(gt, dent); + + drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), dent, minor); + + if (xe_gt_is_main_type(gt)) { + drm_debugfs_create_files(pf_ggtt_info, + ARRAY_SIZE(pf_ggtt_info), + dent, minor); + if (xe_device_has_lmtt(xe)) + drm_debugfs_create_files(pf_lmem_info, + ARRAY_SIZE(pf_lmem_info), + dent, minor); } } } + +/** + * xe_gt_sriov_pf_debugfs_populate() - Create SR-IOV GT-level debugfs directories and files. + * @gt: the &xe_gt to register + * @parent: the parent &dentry that represents a &xe_tile + * @vfid: the VF identifier + * + * Add to the @parent directory new debugfs directory that will represent a @gt and + * populate it with GT files that are related to the SR-IOV @vfid function. + * + * This function can only be called on PF. + */ +void xe_gt_sriov_pf_debugfs_populate(struct xe_gt *gt, struct dentry *parent, unsigned int vfid) +{ + struct dentry *dent; + char name[8]; /* should be enough up to "gt%u\0" for 2^8 - 1 */ + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, extract_priv(parent) == gt->tile); + xe_gt_assert(gt, extract_priv(parent->d_parent) == gt_to_xe(gt) || + (uintptr_t)extract_priv(parent->d_parent) == vfid); + + /* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * │ ├── pf + * │ │ ├── tile0 # parent + * │ │ │ ├── gt0 # d_inode->i_private = (xe_gt*) + * │ │ │ ├── gt1 + * │ │ : : + * │ ├── vf1 + * │ │ ├── tile0 # parent + * │ │ │ ├── gt0 # d_inode->i_private = (xe_gt*) + * │ │ │ ├── gt1 + * │ : : : + */ + snprintf(name, sizeof(name), "gt%u", gt->info.id); + dent = debugfs_create_dir(name, parent); + if (IS_ERR(dent)) + return; + dent->d_inode->i_private = gt; + + xe_gt_assert(gt, extract_gt(dent) == gt); + xe_gt_assert(gt, extract_vfid(dent) == vfid); + + pf_populate_gt(gt, dent, vfid); +} + +static void pf_add_links(struct xe_gt *gt, struct dentry *dent) +{ + unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt); + unsigned int vfid; + char name[16]; /* should be more than enough for "vf%u\0" and VFID(UINT_MAX) */ + char symlink[64]; /* should be more enough for "../../sriov/vf%u/tile%u/gt%u\0" */ + + for (vfid = 0; vfid <= totalvfs; vfid++) { + if (vfid) + snprintf(name, sizeof(name), "vf%u", vfid); + else + snprintf(name, sizeof(name), "pf"); + snprintf(symlink, sizeof(symlink), "../../sriov/%s/tile%u/gt%u", + name, gt->tile->id, gt->info.id); + debugfs_create_symlink(name, dent, symlink); + } +} + +/** + * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs. + * @gt: the &xe_gt to register + * @dent: the &dentry that represents the GT directory + * + * Instead of actual files, create symlinks for PF and each VF to their GT specific + * attributes that should be already exposed in the dedicated debugfs SR-IOV tree. + */ +void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *dent) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, dent->d_inode->i_private == gt); + + pf_add_links(gt, dent); +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h index 038cc8ddc244..82ff3b7f0532 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h @@ -11,6 +11,7 @@ struct dentry; #ifdef CONFIG_PCI_IOV void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root); +void xe_gt_sriov_pf_debugfs_populate(struct xe_gt *gt, struct dentry *parent, unsigned int vfid); #else static inline void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) { } #endif diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c index 91973ee9bb05..335a79d09639 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c @@ -6,7 +6,9 @@ #include #include +#include "xe_device.h" #include "xe_device_types.h" +#include "xe_gt_sriov_pf_debugfs.h" #include "xe_tile_sriov_pf_debugfs.h" #include "xe_sriov.h" @@ -51,6 +53,15 @@ static unsigned int extract_vfid(struct dentry *d) return pp == extract_xe(d) ? PFID : (uintptr_t)pp; } +static void pf_populate_tile(struct xe_tile *tile, struct dentry *dent, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + + for_each_gt_on_tile(gt, tile, id) + xe_gt_sriov_pf_debugfs_populate(gt, dent, vfid); +} + /** * xe_tile_sriov_pf_debugfs_populate() - Populate SR-IOV debugfs tree with tile files. * @tile: the &xe_tile to register @@ -95,4 +106,6 @@ void xe_tile_sriov_pf_debugfs_populate(struct xe_tile *tile, struct dentry *pare xe_tile_assert(tile, extract_tile(dent) == tile); xe_tile_assert(tile, extract_vfid(dent) == vfid); xe_tile_assert(tile, extract_xe(dent) == xe); + + pf_populate_tile(tile, dent, vfid); } -- cgit v1.2.3 From 8cd71c40e989124425fbc97f1495f54db078c6d4 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 28 Sep 2025 16:00:27 +0200 Subject: drm/xe/debugfs: Promote xe_tile_debugfs_simple_show We will want to use this helper function in other files. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250928140029.198847-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_tile_debugfs.c | 14 +++++++------- drivers/gpu/drm/xe/xe_tile_debugfs.h | 3 +++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.c b/drivers/gpu/drm/xe/xe_tile_debugfs.c index a3f437d38f86..fff242a5ae56 100644 --- a/drivers/gpu/drm/xe/xe_tile_debugfs.c +++ b/drivers/gpu/drm/xe/xe_tile_debugfs.c @@ -17,7 +17,7 @@ static struct xe_tile *node_to_tile(struct drm_info_node *node) } /** - * tile_debugfs_simple_show - A show callback for struct drm_info_list + * xe_tile_debugfs_simple_show() - A show callback for struct drm_info_list * @m: the &seq_file * @data: data used by the drm debugfs helpers * @@ -58,7 +58,7 @@ static struct xe_tile *node_to_tile(struct drm_info_node *node) * * Return: 0 on success or a negative error code on failure. */ -static int tile_debugfs_simple_show(struct seq_file *m, void *data) +int xe_tile_debugfs_simple_show(struct seq_file *m, void *data) { struct drm_printer p = drm_seq_file_printer(m); struct drm_info_node *node = m->private; @@ -69,7 +69,7 @@ static int tile_debugfs_simple_show(struct seq_file *m, void *data) } /** - * tile_debugfs_show_with_rpm - A show callback for struct drm_info_list + * xe_tile_debugfs_show_with_rpm() - A show callback for struct drm_info_list * @m: the &seq_file * @data: data used by the drm debugfs helpers * @@ -77,7 +77,7 @@ static int tile_debugfs_simple_show(struct seq_file *m, void *data) * * Return: 0 on success or a negative error code on failure. */ -static int tile_debugfs_show_with_rpm(struct seq_file *m, void *data) +int xe_tile_debugfs_show_with_rpm(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; struct xe_tile *tile = node_to_tile(node); @@ -85,7 +85,7 @@ static int tile_debugfs_show_with_rpm(struct seq_file *m, void *data) int ret; xe_pm_runtime_get(xe); - ret = tile_debugfs_simple_show(m, data); + ret = xe_tile_debugfs_simple_show(m, data); xe_pm_runtime_put(xe); return ret; @@ -106,8 +106,8 @@ static int sa_info(struct xe_tile *tile, struct drm_printer *p) /* only for debugfs files which can be safely used on the VF */ static const struct drm_info_list vf_safe_debugfs_list[] = { - { "ggtt", .show = tile_debugfs_show_with_rpm, .data = ggtt }, - { "sa_info", .show = tile_debugfs_show_with_rpm, .data = sa_info }, + { "ggtt", .show = xe_tile_debugfs_show_with_rpm, .data = ggtt }, + { "sa_info", .show = xe_tile_debugfs_show_with_rpm, .data = sa_info }, }; /** diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.h b/drivers/gpu/drm/xe/xe_tile_debugfs.h index 0e5f724de37f..4429c22542f4 100644 --- a/drivers/gpu/drm/xe/xe_tile_debugfs.h +++ b/drivers/gpu/drm/xe/xe_tile_debugfs.h @@ -6,8 +6,11 @@ #ifndef _XE_TILE_DEBUGFS_H_ #define _XE_TILE_DEBUGFS_H_ +struct seq_file; struct xe_tile; void xe_tile_debugfs_register(struct xe_tile *tile); +int xe_tile_debugfs_simple_show(struct seq_file *m, void *data); +int xe_tile_debugfs_show_with_rpm(struct seq_file *m, void *data); #endif -- cgit v1.2.3 From 486d7f1bd14fc104c20be13fe8f9632f2e8d08be Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 28 Sep 2025 16:00:28 +0200 Subject: drm/xe/pf: Make GGTT/LMEM debugfs files per-tile Due to initial design of the Xe debugfs, the GGTT and LMEM files were defined on the primary GT, instead of being per-tile. While PF provisioning code is now still maintaining GGTT and LMEM also on the per primary-GT level, this will be refactored soon, but we can fix debugfs layout now, as part of the new SR-IOV tree. For backward compatibility we will provide some symlinks that can be removed once our tools will be fully converted. As we are making all those changes in the user facing interface, take this as apportunity to also start replacing the "LMEM" term, used by the SR-IOV code, with the "VRAM" term, used by Xe driver. Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Cc: Rodrigo Vivi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250928140029.198847-7-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 92 +++++------------ drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c | 136 ++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index fbb3cb3200fb..44fec2aae536 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -100,46 +100,6 @@ static const struct drm_info_list pf_info[] = { }, }; -/* - * /sys/kernel/debug/dri/BDF/ - * ├── sriov - * : ├── pf - * : ├── tile0 - * : ├── gt0 - * : ├── ggtt_available - * ├── ggtt_provisioned - */ - -static const struct drm_info_list pf_ggtt_info[] = { - { - "ggtt_available", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_config_print_available_ggtt, - }, - { - "ggtt_provisioned", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_config_print_ggtt, - }, -}; - -/* - * /sys/kernel/debug/dri/BDF/ - * ├── sriov - * : ├── pf - * : ├── tile0 - * : ├── gt0 - * : ├── lmem_provisioned - */ - -static const struct drm_info_list pf_lmem_info[] = { - { - "lmem_provisioned", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_config_print_lmem, - }, -}; - /* * /sys/kernel/debug/dri/BDF/ * ├── sriov @@ -199,9 +159,7 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) * : ├── pf * │ ├── tile0 * │ : ├── gt0 - * │ : ├── ggtt_spare - * │ ├── lmem_spare - * │ ├── doorbells_spare + * │ : ├── doorbells_spare * │ ├── contexts_spare * │ ├── exec_quantum_ms * │ ├── preempt_timeout_us @@ -209,9 +167,7 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) * ├── vf1 * : ├── tile0 * : ├── gt0 - * : ├── ggtt_quota - * ├── lmem_quota - * ├── doorbells_quota + * : ├── doorbells_quota * ├── contexts_quota * ├── exec_quantum_ms * ├── preempt_timeout_us @@ -249,8 +205,6 @@ static int CONFIG##_get(void *data, u64 *val) \ \ DEFINE_DEBUGFS_ATTRIBUTE(CONFIG##_fops, CONFIG##_get, CONFIG##_set, FORMAT) -DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ggtt, u64, "%llu\n"); -DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(lmem, u64, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ctxs, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n"); @@ -331,13 +285,6 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne xe_gt_assert(gt, gt == extract_gt(parent)); xe_gt_assert(gt, vfid == extract_vfid(parent)); - if (xe_gt_is_main_type(gt)) { - debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", - 0644, parent, parent, &ggtt_fops); - if (xe_device_has_lmtt(gt_to_xe(gt))) - debugfs_create_file_unsafe(vfid ? "lmem_quota" : "lmem_spare", - 0644, parent, parent, &lmem_fops); - } debugfs_create_file_unsafe(vfid ? "doorbells_quota" : "doorbells_spare", 0644, parent, parent, &dbs_fops); debugfs_create_file_unsafe(vfid ? "contexts_quota" : "contexts_spare", @@ -558,6 +505,28 @@ static const struct file_operations config_blob_ops = { .llseek = default_llseek, }; +static void pf_add_compat_attrs(struct xe_gt *gt, struct dentry *dent, unsigned int vfid) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (!xe_gt_is_main_type(gt)) + return; + + if (vfid) { + debugfs_create_symlink("ggtt_quota", dent, "../ggtt_quota"); + if (xe_device_has_lmtt(xe)) + debugfs_create_symlink("lmem_quota", dent, "../vram_quota"); + } else { + debugfs_create_symlink("ggtt_spare", dent, "../ggtt_spare"); + debugfs_create_symlink("ggtt_available", dent, "../ggtt_available"); + debugfs_create_symlink("ggtt_provisioned", dent, "../ggtt_provisioned"); + if (xe_device_has_lmtt(xe)) { + debugfs_create_symlink("lmem_spare", dent, "../vram_spare"); + debugfs_create_symlink("lmem_provisioned", dent, "../vram_provisioned"); + } + } +} + static void pf_populate_gt(struct xe_gt *gt, struct dentry *dent, unsigned int vfid) { struct xe_device *xe = gt_to_xe(gt); @@ -583,17 +552,10 @@ static void pf_populate_gt(struct xe_gt *gt, struct dentry *dent, unsigned int v pf_add_policy_attrs(gt, dent); drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), dent, minor); - - if (xe_gt_is_main_type(gt)) { - drm_debugfs_create_files(pf_ggtt_info, - ARRAY_SIZE(pf_ggtt_info), - dent, minor); - if (xe_device_has_lmtt(xe)) - drm_debugfs_create_files(pf_lmem_info, - ARRAY_SIZE(pf_lmem_info), - dent, minor); - } } + + /* for backward compatibility only */ + pf_add_compat_attrs(gt, dent, vfid); } /** diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c index 335a79d09639..c8df18af4d00 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c @@ -8,9 +8,13 @@ #include "xe_device.h" #include "xe_device_types.h" +#include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_debugfs.h" +#include "xe_pm.h" +#include "xe_tile_debugfs.h" #include "xe_tile_sriov_pf_debugfs.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" /* * /sys/kernel/debug/dri/BDF/ @@ -53,11 +57,143 @@ static unsigned int extract_vfid(struct dentry *d) return pp == extract_xe(d) ? PFID : (uintptr_t)pp; } +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * : ├── tile0 + * : ├── ggtt_available + * ├── ggtt_provisioned + */ + +static int pf_config_print_available_ggtt(struct xe_tile *tile, struct drm_printer *p) +{ + return xe_gt_sriov_pf_config_print_available_ggtt(tile->primary_gt, p); +} + +static int pf_config_print_ggtt(struct xe_tile *tile, struct drm_printer *p) +{ + return xe_gt_sriov_pf_config_print_ggtt(tile->primary_gt, p); +} + +static const struct drm_info_list pf_ggtt_info[] = { + { + "ggtt_available", + .show = xe_tile_debugfs_simple_show, + .data = pf_config_print_available_ggtt, + }, + { + "ggtt_provisioned", + .show = xe_tile_debugfs_simple_show, + .data = pf_config_print_ggtt, + }, +}; + +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * : ├── tile0 + * : ├── vram_provisioned + */ + +static int pf_config_print_vram(struct xe_tile *tile, struct drm_printer *p) +{ + return xe_gt_sriov_pf_config_print_lmem(tile->primary_gt, p); +} + +static const struct drm_info_list pf_vram_info[] = { + { + "vram_provisioned", + .show = xe_tile_debugfs_simple_show, + .data = pf_config_print_vram, + }, +}; + +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * │ ├── pf + * │ │ ├── tile0 + * │ │ │ ├── ggtt_spare + * │ │ │ ├── vram_spare + * │ │ ├── tile1 + * │ │ : : + * │ ├── vf1 + * │ : ├── tile0 + * │ │ ├── ggtt_quota + * │ │ ├── vram_quota + * │ ├── tile1 + * │ : : + */ + +#define DEFINE_SRIOV_TILE_CONFIG_DEBUGFS_ATTRIBUTE(NAME, CONFIG, TYPE, FORMAT) \ + \ +static int NAME##_set(void *data, u64 val) \ +{ \ + struct xe_tile *tile = extract_tile(data); \ + unsigned int vfid = extract_vfid(data); \ + struct xe_gt *gt = tile->primary_gt; \ + struct xe_device *xe = tile->xe; \ + int err; \ + \ + if (val > (TYPE)~0ull) \ + return -EOVERFLOW; \ + \ + xe_pm_runtime_get(xe); \ + err = xe_sriov_pf_wait_ready(xe) ?: \ + xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ + xe_pm_runtime_put(xe); \ + \ + return err; \ +} \ + \ +static int NAME##_get(void *data, u64 *val) \ +{ \ + struct xe_tile *tile = extract_tile(data); \ + unsigned int vfid = extract_vfid(data); \ + struct xe_gt *gt = tile->primary_gt; \ + \ + *val = xe_gt_sriov_pf_config_get_##CONFIG(gt, vfid); \ + return 0; \ +} \ + \ +DEFINE_DEBUGFS_ATTRIBUTE(NAME##_fops, NAME##_get, NAME##_set, FORMAT) + +DEFINE_SRIOV_TILE_CONFIG_DEBUGFS_ATTRIBUTE(ggtt, ggtt, u64, "%llu\n"); +DEFINE_SRIOV_TILE_CONFIG_DEBUGFS_ATTRIBUTE(vram, lmem, u64, "%llu\n"); + +static void pf_add_config_attrs(struct xe_tile *tile, struct dentry *dent, unsigned int vfid) +{ + xe_tile_assert(tile, tile == extract_tile(dent)); + xe_tile_assert(tile, vfid == extract_vfid(dent)); + + debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", + 0644, dent, dent, &ggtt_fops); + if (xe_device_has_lmtt(tile->xe)) + debugfs_create_file_unsafe(vfid ? "vram_quota" : "vram_spare", + 0644, dent, dent, &vram_fops); +} + static void pf_populate_tile(struct xe_tile *tile, struct dentry *dent, unsigned int vfid) { + struct xe_device *xe = tile->xe; + struct drm_minor *minor = xe->drm.primary; struct xe_gt *gt; unsigned int id; + pf_add_config_attrs(tile, dent, vfid); + + if (!vfid) { + drm_debugfs_create_files(pf_ggtt_info, + ARRAY_SIZE(pf_ggtt_info), + dent, minor); + if (xe_device_has_lmtt(xe)) + drm_debugfs_create_files(pf_vram_info, + ARRAY_SIZE(pf_vram_info), + dent, minor); + } + for_each_gt_on_tile(gt, tile, id) xe_gt_sriov_pf_debugfs_populate(gt, dent, vfid); } -- cgit v1.2.3 From 103094205d7dcc4c0d7504c279404e6b8f18c3f6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Sep 2025 23:16:09 +0200 Subject: drm/xe/debugfs: Update xe_gt_topology_dump signature Our debugfs helper xe_gt_debugfs_show_with_rpm() expects print() functions to return int. New signature allows us to drop wrapper. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250923211613.193347-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 8 +------- drivers/gpu/drm/xe/xe_gt_topology.c | 11 +++++++++-- drivers/gpu/drm/xe/xe_gt_topology.h | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index b9176d4398e1..6694a38203d3 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -136,12 +136,6 @@ fw_put: return ret; } -static int topology(struct xe_gt *gt, struct drm_printer *p) -{ - xe_gt_topology_dump(gt, p); - return 0; -} - static int steering(struct xe_gt *gt, struct drm_printer *p) { xe_gt_mcr_steering_dump(gt, p); @@ -239,7 +233,7 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p) * - without access to the PF specific data */ static const struct drm_info_list vf_safe_debugfs_list[] = { - { "topology", .show = xe_gt_debugfs_show_with_rpm, .data = topology }, + { "topology", .show = xe_gt_debugfs_show_with_rpm, .data = xe_gt_topology_dump }, { "register-save-restore", .show = xe_gt_debugfs_show_with_rpm, .data = register_save_restore }, { "workarounds", .show = xe_gt_debugfs_show_with_rpm, .data = workarounds }, diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 4e61c5e39bcb..80ef3a6e0a3b 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -269,8 +269,14 @@ static const char *eu_type_to_str(enum xe_gt_eu_type eu_type) return NULL; } -void -xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) +/** + * xe_gt_topology_dump() - Dump GT topology into a drm printer. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Return: always 0. + */ +int xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) { drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS, gt->fuse_topo.g_dss_mask); @@ -285,6 +291,7 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) if (xe_gt_topology_report_l3(gt)) drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, gt->fuse_topo.l3_bank_mask); + return 0; } /* diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index 5e62f5949b7b..3ff40f44bf2a 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -23,7 +23,7 @@ struct drm_printer; void xe_gt_topology_init(struct xe_gt *gt); -void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); /** * xe_gt_topology_mask_last_dss() - Returns the index of the last DSS in a mask. -- cgit v1.2.3 From d06e0c33f3fcb4a60c9359c606b751ccfeaf5e5c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Sep 2025 23:16:10 +0200 Subject: drm/xe/debugfs: Update xe_wa_dump signature Our debugfs helper xe_gt_debugfs_show_with_rpm() expects print() functions to return int. New signature allows us to drop wrapper. While around, print additional separation lines using puts() to avoid output with leading \n which might confuse some printers. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250923211613.193347-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 8 +------- drivers/gpu/drm/xe/xe_wa.c | 19 +++++++++++++++---- drivers/gpu/drm/xe/xe_wa.h | 2 +- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 6694a38203d3..e6a7684d571a 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -167,12 +167,6 @@ static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) return 0; } -static int workarounds(struct xe_gt *gt, struct drm_printer *p) -{ - xe_wa_dump(gt, p); - return 0; -} - static int tunings(struct xe_gt *gt, struct drm_printer *p) { xe_tuning_dump(gt, p); @@ -236,7 +230,7 @@ static const struct drm_info_list vf_safe_debugfs_list[] = { { "topology", .show = xe_gt_debugfs_show_with_rpm, .data = xe_gt_topology_dump }, { "register-save-restore", .show = xe_gt_debugfs_show_with_rpm, .data = register_save_restore }, - { "workarounds", .show = xe_gt_debugfs_show_with_rpm, .data = workarounds }, + { "workarounds", .show = xe_gt_debugfs_show_with_rpm, .data = xe_wa_gt_dump }, { "tunings", .show = xe_gt_debugfs_show_with_rpm, .data = tunings }, { "default_lrc_rcs", .show = xe_gt_debugfs_show_with_rpm, .data = rcs_default_lrc }, { "default_lrc_ccs", .show = xe_gt_debugfs_show_with_rpm, .data = ccs_default_lrc }, diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index cd03891654a1..c60159a13001 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -1086,7 +1086,14 @@ void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p) drm_printf_indent(p, 1, "%s\n", device_oob_was[idx].name); } -void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) +/** + * xe_wa_gt_dump() - Dump GT workarounds into a drm printer. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Return: always 0. + */ +int xe_wa_gt_dump(struct xe_gt *gt, struct drm_printer *p) { size_t idx; @@ -1094,18 +1101,22 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) for_each_set_bit(idx, gt->wa_active.gt, ARRAY_SIZE(gt_was)) drm_printf_indent(p, 1, "%s\n", gt_was[idx].name); - drm_printf(p, "\nEngine Workarounds\n"); + drm_puts(p, "\n"); + drm_printf(p, "Engine Workarounds\n"); for_each_set_bit(idx, gt->wa_active.engine, ARRAY_SIZE(engine_was)) drm_printf_indent(p, 1, "%s\n", engine_was[idx].name); - drm_printf(p, "\nLRC Workarounds\n"); + drm_puts(p, "\n"); + drm_printf(p, "LRC Workarounds\n"); for_each_set_bit(idx, gt->wa_active.lrc, ARRAY_SIZE(lrc_was)) drm_printf_indent(p, 1, "%s\n", lrc_was[idx].name); - drm_printf(p, "\nOOB Workarounds\n"); + drm_puts(p, "\n"); + drm_printf(p, "OOB Workarounds\n"); for_each_set_bit(idx, gt->wa_active.oob, ARRAY_SIZE(oob_was)) if (oob_was[idx].name) drm_printf_indent(p, 1, "%s\n", oob_was[idx].name); + return 0; } /* diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index 6a869b2de643..8fd6a5af0910 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -22,7 +22,7 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); void xe_wa_apply_tile_workarounds(struct xe_tile *tile); void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p); -void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); +int xe_wa_gt_dump(struct xe_gt *gt, struct drm_printer *p); /** * XE_GT_WA - Out-of-band GT workarounds, to be queried and called as needed. -- cgit v1.2.3 From 8980530abff4408f3cff23f1bddf745eb3f5cc8c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Sep 2025 23:16:11 +0200 Subject: drm/xe/debugfs: Update xe_tuning_dump signature Our debugfs helper xe_gt_debugfs_show_with_rpm() expects print() functions to return int. New signature allows us to drop wrapper. While around, print additional separation lines using puts() to avoid output with leading \n which might confuse some printers. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250923211613.193347-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 8 +------- drivers/gpu/drm/xe/xe_tuning.c | 17 ++++++++++++++--- drivers/gpu/drm/xe/xe_tuning.h | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index e6a7684d571a..bcf234e74471 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -167,12 +167,6 @@ static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) return 0; } -static int tunings(struct xe_gt *gt, struct drm_printer *p) -{ - xe_tuning_dump(gt, p); - return 0; -} - static int pat(struct xe_gt *gt, struct drm_printer *p) { xe_pat_dump(gt, p); @@ -231,7 +225,7 @@ static const struct drm_info_list vf_safe_debugfs_list[] = { { "register-save-restore", .show = xe_gt_debugfs_show_with_rpm, .data = register_save_restore }, { "workarounds", .show = xe_gt_debugfs_show_with_rpm, .data = xe_wa_gt_dump }, - { "tunings", .show = xe_gt_debugfs_show_with_rpm, .data = tunings }, + { "tunings", .show = xe_gt_debugfs_show_with_rpm, .data = xe_tuning_dump }, { "default_lrc_rcs", .show = xe_gt_debugfs_show_with_rpm, .data = rcs_default_lrc }, { "default_lrc_ccs", .show = xe_gt_debugfs_show_with_rpm, .data = ccs_default_lrc }, { "default_lrc_bcs", .show = xe_gt_debugfs_show_with_rpm, .data = bcs_default_lrc }, diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index a524170a04d0..fd58ea5e78bf 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -214,7 +214,14 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc); } -void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) +/** + * xe_tuning_dump() - Dump GT tuning info into a drm printer. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Return: always 0. + */ +int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) { size_t idx; @@ -222,11 +229,15 @@ void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)) drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name); - drm_printf(p, "\nEngine Tunings\n"); + drm_puts(p, "\n"); + drm_printf(p, "Engine Tunings\n"); for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)) drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name); - drm_printf(p, "\nLRC Tunings\n"); + drm_puts(p, "\n"); + drm_printf(p, "LRC Tunings\n"); for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)) drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name); + + return 0; } diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h index dd0d3ccc9c65..c1cc5927fda7 100644 --- a/drivers/gpu/drm/xe/xe_tuning.h +++ b/drivers/gpu/drm/xe/xe_tuning.h @@ -14,6 +14,6 @@ int xe_tuning_init(struct xe_gt *gt); void xe_tuning_process_gt(struct xe_gt *gt); void xe_tuning_process_engine(struct xe_hw_engine *hwe); void xe_tuning_process_lrc(struct xe_hw_engine *hwe); -void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p); +int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p); #endif -- cgit v1.2.3 From ab6ccd4f7eb376157b3750d5ea067db6ffc614eb Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Sep 2025 23:16:12 +0200 Subject: drm/xe/debugfs: Update xe_mocs_dump signature Our debugfs helper xe_gt_debugfs_show_with_rpm() expects print() functions to return int. New signature allows us to drop wrapper. While around, move kernel-doc closer to the function definition, as suggested in the doc-guide. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250923211613.193347-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 8 +------- drivers/gpu/drm/xe/xe_mocs.c | 15 +++++++++++++-- drivers/gpu/drm/xe/xe_mocs.h | 8 +------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index bcf234e74471..b0e6dafeeacc 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -173,12 +173,6 @@ static int pat(struct xe_gt *gt, struct drm_printer *p) return 0; } -static int mocs(struct xe_gt *gt, struct drm_printer *p) -{ - xe_mocs_dump(gt, p); - return 0; -} - static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_RENDER); @@ -237,7 +231,7 @@ static const struct drm_info_list vf_safe_debugfs_list[] = { /* everything else should be added here */ static const struct drm_info_list pf_only_debugfs_list[] = { { "hw_engines", .show = xe_gt_debugfs_show_with_rpm, .data = hw_engines }, - { "mocs", .show = xe_gt_debugfs_show_with_rpm, .data = mocs }, + { "mocs", .show = xe_gt_debugfs_show_with_rpm, .data = xe_mocs_dump }, { "pat", .show = xe_gt_debugfs_show_with_rpm, .data = pat }, { "powergate_info", .show = xe_gt_debugfs_show_with_rpm, .data = xe_gt_idle_pg_print }, { "steering", .show = xe_gt_debugfs_show_with_rpm, .data = steering }, diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 0c737413fcb6..7b68c22ff7bb 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -772,12 +772,20 @@ void xe_mocs_init(struct xe_gt *gt) init_l3cc_table(gt, &table); } -void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) +/** + * xe_mocs_dump() - Dump MOCS table. + * @gt: the &xe_gt with MOCS table + * @p: the &drm_printer to dump info to + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); enum xe_force_wake_domains domain; struct xe_mocs_info table; unsigned int fw_ref, flags; + int err = 0; flags = get_mocs_settings(xe, &table); @@ -785,14 +793,17 @@ void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) xe_pm_runtime_get_noresume(xe); fw_ref = xe_force_wake_get(gt_to_fw(gt), domain); - if (!xe_force_wake_ref_has_domain(fw_ref, domain)) + if (!xe_force_wake_ref_has_domain(fw_ref, domain)) { + err = -ETIMEDOUT; goto err_fw; + } table.ops->dump(&table, flags, gt, p); err_fw: xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); + return err; } #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h index dc972ffd4d07..f00bbb269829 100644 --- a/drivers/gpu/drm/xe/xe_mocs.h +++ b/drivers/gpu/drm/xe/xe_mocs.h @@ -11,12 +11,6 @@ struct xe_gt; void xe_mocs_init_early(struct xe_gt *gt); void xe_mocs_init(struct xe_gt *gt); - -/** - * xe_mocs_dump - Dump mocs table - * @gt: GT structure - * @p: Printer to dump info to - */ -void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p); +int xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p); #endif -- cgit v1.2.3 From 65774efef2d53c3ac37694d59ff9ec0d16be3f7f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Sep 2025 23:16:13 +0200 Subject: drm/xe/debugfs: Update xe_pat_dump signature Our debugfs helper xe_gt_debugfs_show_with_rpm() expects print() functions to return int. New signature allows us to drop wrapper. While around, move kernel-doc closer to the function definition, as suggested in the doc-guide. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250923211613.193347-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 8 +------- drivers/gpu/drm/xe/xe_pat.c | 40 +++++++++++++++++++++++++------------- drivers/gpu/drm/xe/xe_pat.h | 7 +------ 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index b0e6dafeeacc..e4fd632f43cf 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -167,12 +167,6 @@ static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) return 0; } -static int pat(struct xe_gt *gt, struct drm_printer *p) -{ - xe_pat_dump(gt, p); - return 0; -} - static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_RENDER); @@ -232,7 +226,7 @@ static const struct drm_info_list vf_safe_debugfs_list[] = { static const struct drm_info_list pf_only_debugfs_list[] = { { "hw_engines", .show = xe_gt_debugfs_show_with_rpm, .data = hw_engines }, { "mocs", .show = xe_gt_debugfs_show_with_rpm, .data = xe_mocs_dump }, - { "pat", .show = xe_gt_debugfs_show_with_rpm, .data = pat }, + { "pat", .show = xe_gt_debugfs_show_with_rpm, .data = xe_pat_dump }, { "powergate_info", .show = xe_gt_debugfs_show_with_rpm, .data = xe_gt_idle_pg_print }, { "steering", .show = xe_gt_debugfs_show_with_rpm, .data = steering }, }; diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 2e7cb99ae87a..6e48ff84ad0a 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -57,7 +57,7 @@ struct xe_pat_ops { int n_entries); void (*program_media)(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries); - void (*dump)(struct xe_gt *gt, struct drm_printer *p); + int (*dump)(struct xe_gt *gt, struct drm_printer *p); }; static const struct xe_pat_table_entry xelp_pat_table[] = { @@ -194,7 +194,7 @@ static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry ta xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_pta->value); } -static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) +static int xelp_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); unsigned int fw_ref; @@ -202,7 +202,7 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) - return; + return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); @@ -215,6 +215,7 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) } xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } static const struct xe_pat_ops xelp_pat_ops = { @@ -222,7 +223,7 @@ static const struct xe_pat_ops xelp_pat_ops = { .dump = xelp_dump, }; -static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) +static int xehp_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); unsigned int fw_ref; @@ -230,7 +231,7 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) - return; + return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); @@ -245,6 +246,7 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) } xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } static const struct xe_pat_ops xehp_pat_ops = { @@ -252,7 +254,7 @@ static const struct xe_pat_ops xehp_pat_ops = { .dump = xehp_dump, }; -static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) +static int xehpc_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); unsigned int fw_ref; @@ -260,7 +262,7 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) - return; + return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); @@ -273,6 +275,7 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) } xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } static const struct xe_pat_ops xehpc_pat_ops = { @@ -280,7 +283,7 @@ static const struct xe_pat_ops xehpc_pat_ops = { .dump = xehpc_dump, }; -static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) +static int xelpg_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); unsigned int fw_ref; @@ -288,7 +291,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) - return; + return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); @@ -306,6 +309,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) } xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } /* @@ -318,7 +322,7 @@ static const struct xe_pat_ops xelpg_pat_ops = { .dump = xelpg_dump, }; -static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) +static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); unsigned int fw_ref; @@ -327,7 +331,7 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) - return; + return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); @@ -367,6 +371,7 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) pat); xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } static const struct xe_pat_ops xe2_pat_ops = { @@ -462,12 +467,19 @@ void xe_pat_init(struct xe_gt *gt) xe->pat.ops->program_graphics(gt, xe->pat.table, xe->pat.n_entries); } -void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p) +/** + * xe_pat_dump() - Dump GT PAT table into a drm printer. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); if (!xe->pat.ops) - return; + return -EOPNOTSUPP; - xe->pat.ops->dump(gt, p); + return xe->pat.ops->dump(gt, p); } diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h index fa0dfbe525cd..268c9a899f56 100644 --- a/drivers/gpu/drm/xe/xe_pat.h +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -43,12 +43,7 @@ void xe_pat_init_early(struct xe_device *xe); */ void xe_pat_init(struct xe_gt *gt); -/** - * xe_pat_dump - Dump PAT table - * @gt: GT structure - * @p: Printer to dump info to - */ -void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p); +int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p); /** * xe_pat_index_get_coh_mode - Extract the coherency mode for the given -- cgit v1.2.3 From 8f1756a7ea33b352a54e6f53d76c552b3a424187 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 29 Sep 2025 13:26:49 +0200 Subject: drm/xe/bo: Fix an idle assertion for local bos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before calling ttm_bo_populate() in the CPU fault path of a bo, we assert that the bo is not being migrated. However, for local bos we share the reservation object with other local bos that might be in the process of being migrated. Also some VM operations may attach USAGE_KERNEL fences to the common reservation object and trigger false positives from the assert. So remove the assert and instead wait for bo idle. This may unnecessarily wait for idle in some cases but since we're doing this wait later in the fault path anyway we might as well do it here as well. This fixes warnings like: Sep 25 14:56:23 desky kernel: ------------[ cut here ]------------ Sep 25 14:56:23 desky kernel: xe 0000:03:00.0: [drm] Assertion `dma_resv_test_signaled(tbo->base.resv, DMA_RESV_USAGE_KERNEL) || (tbo->ttm && ttm_tt_is_populated(tbo->ttm))` failed! platform: BATTLEMAGE subplatform: 1 graphics: Xe2_HPG 20.01 step A0 media: Xe2_HPM 13.01 step A1 Sep 25 14:56:23 desky kernel: WARNING: CPU: 6 PID: 24767 at drivers/gpu/drm/xe/xe_bo.c:1748 xe_bo_fault_migrate+0x1bb/0x300 [xe] Sep 25 14:56:23 desky kernel: Modules linked in: cpuid dm_crypt xt_conntrack nft_chain_nat xt_MASQUERADE nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 bridge stp llc xfrm_user xfr> Sep 25 14:56:23 desky kernel: snd_soc_sdca snd_seq_midi prime_numbers coretemp snd_seq_midi_event drm_ttm_helper snd_hda_codec drm_buddy drm_exec snd_rawmidi snd_soc_core snd_hda_cor> Sep 25 14:56:23 desky kernel: CPU: 6 UID: 1000 PID: 24767 Comm: steamwebhelper Tainted: G U W 6.17.0-rc7+ #32 PREEMPT(voluntary) Sep 25 14:56:23 desky kernel: Tainted: [U]=USER, [W]=WARN Sep 25 14:56:23 desky kernel: Hardware name: Micro-Star International Co., Ltd. MS-7D36/PRO Z690-P DDR4 (MS-7D36), BIOS A.A1 10/18/2022 Sep 25 14:56:23 desky kernel: RIP: 0010:xe_bo_fault_migrate+0x1bb/0x300 [xe] Sep 25 14:56:23 desky kernel: Code: fa 64 29 f9 48 c7 c7 40 e0 d3 c1 51 48 c7 c1 c0 e3 d3 c1 52 4c 8b 45 c0 41 50 44 8b 4d c8 4d 89 e0 48 8b 55 a8 e8 25 27 95 ef <0f> 0b 48 83 c4 40 4> Sep 25 14:56:23 desky kernel: RSP: 0000:ffffae1ca88c7b10 EFLAGS: 00010286 Sep 25 14:56:23 desky kernel: RAX: 0000000000000000 RBX: ffff8d7cfd7e6800 RCX: 0000000000000027 Sep 25 14:56:23 desky kernel: RDX: ffff8d845019cec8 RSI: 0000000000000001 RDI: ffff8d845019cec0 Sep 25 14:56:23 desky kernel: RBP: ffffae1ca88c7bc8 R08: 0000000000000000 R09: 0000000000000000 Sep 25 14:56:23 desky kernel: R10: 0000000000000000 R11: 0000000000000004 R12: ffffffffc1db1faa Sep 25 14:56:23 desky kernel: R13: ffffffffc1db2ab4 R14: 0000000000000001 R15: ffffae1ca88c7bd8 Sep 25 14:56:23 desky kernel: FS: 00007fb1baf31940(0000) GS:ffff8d849c870000(0000) knlGS:0000000000000000 Sep 25 14:56:23 desky kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Sep 25 14:56:23 desky kernel: CR2: 00007fb1b2860020 CR3: 00000001705a9004 CR4: 0000000000772ef0 Sep 25 14:56:23 desky kernel: PKRU: 55555558 Sep 25 14:56:23 desky kernel: Call Trace: Sep 25 14:56:23 desky kernel: Sep 25 14:56:23 desky kernel: xe_bo_cpu_fault_fastpath+0x11e/0x220 [xe] Sep 25 14:56:23 desky kernel: xe_bo_cpu_fault+0x84/0x410 [xe] Sep 25 14:56:23 desky kernel: ? __x64_sys_mmap+0x33/0x50 Sep 25 14:56:23 desky kernel: ? x64_sys_call+0x1b2e/0x20d0 Sep 25 14:56:23 desky kernel: ? do_syscall_64+0x9d/0x1f0 Sep 25 14:56:23 desky kernel: ? __check_object_size+0x4a/0x2e0 Sep 25 14:56:23 desky kernel: __do_fault+0x36/0x190 Sep 25 14:56:23 desky kernel: do_fault+0xcf/0x570 Sep 25 14:56:23 desky kernel: __handle_mm_fault+0x92b/0xfe0 Sep 25 14:56:23 desky kernel: ? ktime_get_mono_fast_ns+0x39/0xd0 Sep 25 14:56:23 desky kernel: handle_mm_fault+0x164/0x2c0 Sep 25 14:56:23 desky kernel: do_user_addr_fault+0x2cb/0x840 Sep 25 14:56:23 desky kernel: exc_page_fault+0x75/0x180 Sep 25 14:56:23 desky kernel: asm_exc_page_fault+0x27/0x30 Sep 25 14:56:23 desky kernel: RIP: 0033:0x7fb1bc388bb7 Sep 25 14:56:23 desky kernel: Code: 48 ff c7 48 01 fe 48 8d 54 11 80 0f 1f 84 00 00 00 00 00 c5 fe 6f 0e c5 fe 6f 56 20 c5 fe 6f 5e 40 c5 fe 6f 66 60 48 83 ee 80 fd 7f 0f c5 fd 7> Sep 25 14:56:23 desky kernel: RSP: 002b:00007ffd7814fad8 EFLAGS: 00010207 Sep 25 14:56:23 desky kernel: RAX: 00007fb1b2860000 RBX: 0000000000000690 RCX: 00007fb1b2860000 Sep 25 14:56:23 desky kernel: RDX: 00007fb1b2860610 RSI: 0000556eda79f4c0 RDI: 00007fb1b2860020 Sep 25 14:56:23 desky kernel: RBP: 00007ffd7814fb60 R08: 0000000000000000 R09: 000000012be0e000 Sep 25 14:56:23 desky kernel: R10: 00007fb1b2860000 R11: 0000000000000246 R12: 0000556edd39a240 Sep 25 14:56:23 desky kernel: R13: 00007fb1b2dcb010 R14: 0000556eda79f420 R15: 0000000000000000 Sep 25 14:56:23 desky kernel: Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5250 Fixes: c2ae94cf8cd8 ("drm/xe: Convert the CPU fault handler for exhaustive eviction") Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250929112649.6131-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_bo.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 8422f3cab113..4410e28dee54 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1737,6 +1737,24 @@ static bool should_migrate_to_smem(struct xe_bo *bo) bo->attr.atomic_access == DRM_XE_ATOMIC_CPU; } +static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx) +{ + long lerr; + + if (ctx->no_wait_gpu) + return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ? + 0 : -EBUSY; + + lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + ctx->interruptible, MAX_SCHEDULE_TIMEOUT); + if (lerr < 0) + return lerr; + if (lerr == 0) + return -EBUSY; + + return 0; +} + /* Populate the bo if swapped out, or migrate if the access mode requires that. */ static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx, struct drm_exec *exec) @@ -1745,10 +1763,9 @@ static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx, int err = 0; if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) { - xe_assert(xe_bo_device(bo), - dma_resv_test_signaled(tbo->base.resv, DMA_RESV_USAGE_KERNEL) || - (tbo->ttm && ttm_tt_is_populated(tbo->ttm))); - err = ttm_bo_populate(&bo->ttm, ctx); + err = xe_bo_wait_usage_kernel(bo, ctx); + if (!err) + err = ttm_bo_populate(&bo->ttm, ctx); } else if (should_migrate_to_smem(bo)) { xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM); err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec); @@ -1922,7 +1939,6 @@ static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf) .no_wait_gpu = false, .gfp_retry_mayfail = retry_after_wait, }; - long lerr; err = drm_exec_lock_obj(&exec, &tbo->base); drm_exec_retry_on_contention(&exec); @@ -1942,13 +1958,9 @@ static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf) break; } - lerr = dma_resv_wait_timeout(tbo->base.resv, - DMA_RESV_USAGE_KERNEL, true, - MAX_SCHEDULE_TIMEOUT); - if (lerr < 0) { - err = lerr; + err = xe_bo_wait_usage_kernel(bo, &tctx); + if (err) break; - } if (!retry_after_wait) ret = __xe_bo_cpu_fault(vmf, xe, bo); -- cgit v1.2.3 From 20f3b28e2e07747fd27301f0f5deb3cb569ee15c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 24 Sep 2025 11:22:08 +0100 Subject: drm/xe/xe_late_bind_fw: Fix missing initialization of variable offset The variable offset is not being initialized, and it is only set inside a for-loop if entry->name is the same as manifest_entry. In the case where it is not initialized a non-zero check on offset is potentialy checking a bogus uninitalized value. Fix this by initializing offset to zero. Fixes: efa29317a553 ("drm/xe/xe_late_bind_fw: Extract and print version info") Signed-off-by: Colin Ian King Reviewed-by: Badal Nilawar Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250924102208.9216-1-colin.i.king@gmail.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_late_bind_fw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c index 38f3feb2aecd..8f5082e689dc 100644 --- a/drivers/gpu/drm/xe/xe_late_bind_fw.c +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c @@ -60,7 +60,7 @@ static int parse_cpd_header(struct xe_late_bind_fw *lb_fw, const struct gsc_manifest_header *manifest; const struct gsc_cpd_entry *entry; size_t min_size = sizeof(*header); - u32 offset; + u32 offset = 0; int i; /* manifest_entry is mandatory */ @@ -116,7 +116,7 @@ static int parse_lb_layout(struct xe_late_bind_fw *lb_fw, const struct csc_fpt_header *header = data; const struct csc_fpt_entry *entry; size_t min_size = sizeof(*header); - u32 offset; + u32 offset = 0; int i; /* fpt_entry is mandatory */ -- cgit v1.2.3 From 5a856e277b237e35123f797ca0f7d215e1b83db2 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Thu, 18 Sep 2025 17:18:03 +0530 Subject: drm/xe/hwmon: Drop redundant runtime PM usage The device is expected to be in D0 state during driver probe. No need to resume it in ->is_visible() callbacks. Signed-off-by: Raag Jadav Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250918114804.2957177-2-raag.jadav@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hwmon.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index b6790589e623..97879daeefc1 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -658,8 +658,6 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, struct xe_reg rapl_limit; struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - xe_pm_runtime_get(hwmon->xe); - if (hwmon->xe->info.has_mbx_power_limits) { xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &uval); } else if (power_attr != PL2_HWMON_ATTR) { @@ -669,8 +667,6 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, } ret = (uval & PWR_LIM_EN) ? attr->mode : 0; - xe_pm_runtime_put(hwmon->xe); - return ret; } @@ -1096,8 +1092,6 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata; int ret; - xe_pm_runtime_get(hwmon->xe); - switch (type) { case hwmon_temp: ret = xe_hwmon_temp_is_visible(hwmon, attr, channel); @@ -1122,8 +1116,6 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, break; } - xe_pm_runtime_put(hwmon->xe); - return ret; } -- cgit v1.2.3 From d9c401d8f37cd5510cc64647b0421f95e62fe3a2 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Thu, 18 Sep 2025 17:18:04 +0530 Subject: drm/xe/sysfs: Drop redundant runtime PM usage The device is expected to be in D0 state during driver probe. No need to resume it in ->is_visible() callbacks or non I/O operations. Signed-off-by: Raag Jadav Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250918114804.2957177-3-raag.jadav@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device_sysfs.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index c5151c86a98a..ec9c06b06fb5 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -38,13 +38,8 @@ vram_d3cold_threshold_show(struct device *dev, { struct pci_dev *pdev = to_pci_dev(dev); struct xe_device *xe = pdev_to_xe_device(pdev); - int ret; - - xe_pm_runtime_get(xe); - ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold); - xe_pm_runtime_put(xe); - return ret; + return sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold); } static ssize_t @@ -173,11 +168,8 @@ static umode_t late_bind_attr_is_visible(struct kobject *kobj, u32 cap = 0; int ret; - xe_pm_runtime_get(xe); - ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), &cap, NULL); - xe_pm_runtime_put(xe); if (ret) return 0; -- cgit v1.2.3 From ad298d9ec957414dbf3d51f3c8bca4b6d2416c0c Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 30 Sep 2025 14:27:52 +0200 Subject: drm/gpusvm, drm/xe: Fix userptr to not allow device private pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When userptr is used on SVM-enabled VMs, a non-NULL hmm_range::dev_private_owner value might mean that hmm_range_fault() attempts to return device private pages. Either that will fail, or the userptr code will not know how to handle those. Use NULL for hmm_range::dev_private_owner to migrate such pages to system. In order to do that, move the struct drm_gpusvm::device_private_page_owner field to struct drm_gpusvm_ctx::device_private_page_owner so that it doesn't remain immutable over the drm_gpusvm lifetime. v2: - Don't conditionally compile xe_svm_devm_owner(). - Kerneldoc xe_svm_devm_owner(). Fixes: 9e9787414882 ("drm/xe/userptr: replace xe_hmm with gpusvm") Cc: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Reviewed-by: Matthew Brost Acked-by: Maarten Lankhorst Link: https://lore.kernel.org/r/20250930122752.96034-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/drm_gpusvm.c | 24 +++++++++++++----------- drivers/gpu/drm/xe/xe_svm.c | 11 +++-------- drivers/gpu/drm/xe/xe_svm.h | 14 ++++++++++++++ drivers/gpu/drm/xe/xe_userptr.c | 1 + drivers/gpu/drm/xe/xe_vm.c | 1 + include/drm/drm_gpusvm.h | 7 ++++--- 6 files changed, 36 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index eeeeb99cfdf6..cb906765897e 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -361,7 +361,6 @@ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { * @name: Name of the GPU SVM. * @drm: Pointer to the DRM device structure. * @mm: Pointer to the mm_struct for the address space. - * @device_private_page_owner: Device private pages owner. * @mm_start: Start address of GPU SVM. * @mm_range: Range of the GPU SVM. * @notifier_size: Size of individual notifiers. @@ -383,7 +382,7 @@ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { */ int drm_gpusvm_init(struct drm_gpusvm *gpusvm, const char *name, struct drm_device *drm, - struct mm_struct *mm, void *device_private_page_owner, + struct mm_struct *mm, unsigned long mm_start, unsigned long mm_range, unsigned long notifier_size, const struct drm_gpusvm_ops *ops, @@ -395,15 +394,13 @@ int drm_gpusvm_init(struct drm_gpusvm *gpusvm, mmgrab(mm); } else { /* No full SVM mode, only core drm_gpusvm_pages API. */ - if (ops || num_chunks || mm_range || notifier_size || - device_private_page_owner) + if (ops || num_chunks || mm_range || notifier_size) return -EINVAL; } gpusvm->name = name; gpusvm->drm = drm; gpusvm->mm = mm; - gpusvm->device_private_page_owner = device_private_page_owner; gpusvm->mm_start = mm_start; gpusvm->mm_range = mm_range; gpusvm->notifier_size = notifier_size; @@ -684,6 +681,7 @@ static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn, * @notifier: Pointer to the GPU SVM notifier structure * @start: Start address * @end: End address + * @dev_private_owner: The device private page owner * * Check if pages between start and end have been faulted in on the CPU. Use to * prevent migration of pages without CPU backing store. @@ -692,14 +690,15 @@ static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn, */ static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, struct drm_gpusvm_notifier *notifier, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, + void *dev_private_owner) { struct hmm_range hmm_range = { .default_flags = 0, .notifier = ¬ifier->notifier, .start = start, .end = end, - .dev_private_owner = gpusvm->device_private_page_owner, + .dev_private_owner = dev_private_owner, }; unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); @@ -753,6 +752,7 @@ err_free: * @gpuva_start: Start address of GPUVA which mirrors CPU * @gpuva_end: End address of GPUVA which mirrors CPU * @check_pages_threshold: Check CPU pages for present threshold + * @dev_private_owner: The device private page owner * * This function determines the chunk size for the GPU SVM range based on the * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual @@ -767,7 +767,8 @@ drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm, unsigned long fault_addr, unsigned long gpuva_start, unsigned long gpuva_end, - unsigned long check_pages_threshold) + unsigned long check_pages_threshold, + void *dev_private_owner) { unsigned long start, end; int i = 0; @@ -814,7 +815,7 @@ retry: * process-many-malloc' mallocs at least 64k at a time. */ if (end - start <= check_pages_threshold && - !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) { + !drm_gpusvm_check_pages(gpusvm, notifier, start, end, dev_private_owner)) { ++i; goto retry; } @@ -957,7 +958,8 @@ drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas, fault_addr, gpuva_start, gpuva_end, - ctx->check_pages_threshold); + ctx->check_pages_threshold, + ctx->device_private_page_owner); if (chunk_size == LONG_MAX) { err = -EINVAL; goto err_notifier_remove; @@ -1268,7 +1270,7 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, .notifier = notifier, .start = pages_start, .end = pages_end, - .dev_private_owner = gpusvm->device_private_page_owner, + .dev_private_owner = ctx->device_private_page_owner, }; void *zdd; unsigned long timeout = diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 7f2f1f041f1d..7e2db71ff34e 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -67,11 +67,6 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) range_debug(range, operation); } -static void *xe_svm_devm_owner(struct xe_device *xe) -{ - return xe; -} - static struct drm_gpusvm_range * xe_svm_range_alloc(struct drm_gpusvm *gpusvm) { @@ -744,15 +739,14 @@ int xe_svm_init(struct xe_vm *vm) xe_svm_garbage_collector_work_func); err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, - current->mm, xe_svm_devm_owner(vm->xe), 0, - vm->size, + current->mm, 0, vm->size, xe_modparam.svm_notifier_size * SZ_1M, &gpusvm_ops, fault_chunk_sizes, ARRAY_SIZE(fault_chunk_sizes)); drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); } else { err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", - &vm->xe->drm, NULL, NULL, 0, 0, 0, NULL, + &vm->xe->drm, NULL, 0, 0, 0, NULL, NULL, 0); } @@ -1017,6 +1011,7 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, .devmem_only = need_vram && devmem_possible, .timeslice_ms = need_vram && devmem_possible ? vm->xe->atomic_svm_timeslice_ms : 0, + .device_private_page_owner = xe_svm_devm_owner(vm->xe), }; struct xe_validation_ctx vctx; struct drm_exec exec; diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index cef6ee7d6fe3..0955d2ac8d74 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -6,6 +6,20 @@ #ifndef _XE_SVM_H_ #define _XE_SVM_H_ +struct xe_device; + +/** + * xe_svm_devm_owner() - Return the owner of device private memory + * @xe: The xe device. + * + * Return: The owner of this device's device private memory to use in + * hmm_range_fault()- + */ +static inline void *xe_svm_devm_owner(struct xe_device *xe) +{ + return xe; +} + #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) #include diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c index 91d09af71ced..f16e92cd8090 100644 --- a/drivers/gpu/drm/xe/xe_userptr.c +++ b/drivers/gpu/drm/xe/xe_userptr.c @@ -54,6 +54,7 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) struct xe_device *xe = vm->xe; struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), + .device_private_page_owner = NULL, }; lockdep_assert_held(&vm->lock); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 80b7f13ecd80..4e914928e0a9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2883,6 +2883,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) ctx.read_only = xe_vma_read_only(vma); ctx.devmem_possible = devmem_possible; ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; + ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); /* TODO: Threading the migration */ xa_for_each(&op->prefetch_range.range, i, svm_range) { diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 5434048a2ca4..b92faa9a26b2 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -179,7 +179,6 @@ struct drm_gpusvm_range { * @name: Name of the GPU SVM * @drm: Pointer to the DRM device structure * @mm: Pointer to the mm_struct for the address space - * @device_private_page_owner: Device private pages owner * @mm_start: Start address of GPU SVM * @mm_range: Range of the GPU SVM * @notifier_size: Size of individual notifiers @@ -204,7 +203,6 @@ struct drm_gpusvm { const char *name; struct drm_device *drm; struct mm_struct *mm; - void *device_private_page_owner; unsigned long mm_start; unsigned long mm_range; unsigned long notifier_size; @@ -226,6 +224,8 @@ struct drm_gpusvm { /** * struct drm_gpusvm_ctx - DRM GPU SVM context * + * @device_private_page_owner: The device-private page owner to use for + * this operation * @check_pages_threshold: Check CPU pages for present if chunk is less than or * equal to threshold. If not present, reduce chunk * size. @@ -239,6 +239,7 @@ struct drm_gpusvm { * Context that is DRM GPUSVM is operating in (i.e. user arguments). */ struct drm_gpusvm_ctx { + void *device_private_page_owner; unsigned long check_pages_threshold; unsigned long timeslice_ms; unsigned int in_notifier :1; @@ -249,7 +250,7 @@ struct drm_gpusvm_ctx { int drm_gpusvm_init(struct drm_gpusvm *gpusvm, const char *name, struct drm_device *drm, - struct mm_struct *mm, void *device_private_page_owner, + struct mm_struct *mm, unsigned long mm_start, unsigned long mm_range, unsigned long notifier_size, const struct drm_gpusvm_ops *ops, -- cgit v1.2.3 From 07abc16c14693df703763c45e9fc0abfefc927d5 Mon Sep 17 00:00:00 2001 From: Mallesh Koujalagi Date: Thu, 2 Oct 2025 06:26:48 +0530 Subject: drm/xe/xe_late_bind_fw: Initialize uval variable in xe_late_bind_fw_num_fans() Initialize the uval variable to 0 in xe_late_bind_fw_num_fans() to fix a potential use of uninitialized variable warning and ensure predictable behavior. The variable is passed by reference to xe_pcode_read() which should populate it on success, but initializing it to 0 provides a safe default value and follows kernel coding best practices. v2: - uval = 0 which serves as both a safe default and the fallback value when the pcode read operation fails. v3: - Handle MMIO failure (Rodrigo) - The function should probably return the error and make the uval as pointer-argument, like the pcode_read. - Change the caller of this function to propagate the error upwards if mmio failed. Fixes: 45832bf9c10f3 ("drm/xe/xe_late_bind_fw: Initialize late binding firmware") Signed-off-by: Mallesh Koujalagi Link: https://lore.kernel.org/r/20251002005648.3185636-1-mallesh.koujalagi@intel.com Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_late_bind_fw.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c index 8f5082e689dc..768442ca7da6 100644 --- a/drivers/gpu/drm/xe/xe_late_bind_fw.c +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c @@ -184,17 +184,13 @@ static const char *xe_late_bind_parse_status(uint32_t status) } } -static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind) +static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind, u32 *num_fans) { struct xe_device *xe = late_bind_to_xe(late_bind); struct xe_tile *root_tile = xe_device_get_root_tile(xe); - u32 uval; - if (!xe_pcode_read(root_tile, - PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), &uval, NULL)) - return uval; - else - return 0; + return xe_pcode_read(root_tile, + PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), num_fans, NULL); } void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind) @@ -314,7 +310,11 @@ static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id) lb_fw->flags &= ~INTEL_LB_FLAG_IS_PERSISTENT; if (lb_fw->type == INTEL_LB_TYPE_FAN_CONTROL) { - num_fans = xe_late_bind_fw_num_fans(late_bind); + ret = xe_late_bind_fw_num_fans(late_bind, &num_fans); + if (ret) { + drm_dbg(&xe->drm, "Failed to read number of fans: %d\n", ret); + return 0; /* Not a fatal error, continue without fan control */ + } drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans); if (!num_fans) return 0; -- cgit v1.2.3 From e4863f1159befcd70df24fcb5458afaf2feab043 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Thu, 18 Sep 2025 16:02:00 +0530 Subject: drm/xe/i2c: Don't rely on d3cold.allowed flag in system PM path In S3 and above sleep states, the device can loose power regardless of d3cold.allowed flag. Bring up I2C controller explicitly in system PM path to ensure its normal operation after losing power. v2: Cover S3 and above states (Rodrigo) Fixes: 0ea07b69517a ("drm/xe/pm: Wire up suspend/resume for I2C controller") Signed-off-by: Raag Jadav Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250918103200.2952576-1-raag.jadav@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 96afa49f0b4b..53507e09f7bc 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -256,7 +256,7 @@ int xe_pm_resume(struct xe_device *xe) if (err) goto err; - xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_i2c_pm_resume(xe, true); xe_irq_resume(xe); -- cgit v1.2.3 From b56bc81078e96d5af984b929da5433c2cf776206 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 1 Oct 2025 21:43:20 -0700 Subject: drm/xe/doc: Add documentation for Execution Queues Add documentation for Xe Execution Queues and add xe_exec_queue.rst file. v2: Add info about how Execution queue interfaces with other components in the driver (Matt Brost) Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251002044319.450181-2-niranjana.vishwanathapura@intel.com Signed-off-by: Lucas De Marchi --- Documentation/gpu/xe/index.rst | 1 + Documentation/gpu/xe/xe_exec_queue.rst | 20 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_exec_queue.c | 23 +++++++++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 Documentation/gpu/xe/xe_exec_queue.rst diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst index 88b22fad880e..bc432c95d1a3 100644 --- a/Documentation/gpu/xe/index.rst +++ b/Documentation/gpu/xe/index.rst @@ -14,6 +14,7 @@ DG2, etc is provided to prototype the driver. xe_mm xe_map xe_migrate + xe_exec_queue xe_cs xe_pm xe_gt_freq diff --git a/Documentation/gpu/xe/xe_exec_queue.rst b/Documentation/gpu/xe/xe_exec_queue.rst new file mode 100644 index 000000000000..6076569e311c --- /dev/null +++ b/Documentation/gpu/xe/xe_exec_queue.rst @@ -0,0 +1,20 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +=============== +Execution Queue +=============== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_exec_queue.c + :doc: Execution Queue + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_exec_queue_types.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/xe/xe_exec_queue.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/xe/xe_exec_queue.c + :internal: diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 37b2b93b73d6..db3f869b53f3 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -28,6 +28,29 @@ #include "xe_vm.h" #include "xe_pxp.h" +/** + * DOC: Execution Queue + * + * An Execution queue is an interface for the HW context of execution. + * The user creates an execution queue, submits the GPU jobs through those + * queues and in the end destroys them. + * + * Execution queues can also be created by XeKMD itself for driver internal + * operations like object migration etc. + * + * An execution queue is associated with a specified HW engine or a group of + * engines (belonging to the same tile and engine class) and any GPU job + * submitted on the queue will be run on one of these engines. + * + * An execution queue is tied to an address space (VM). It holds a reference + * of the associated VM and the underlying Logical Ring Context/s (LRC/s) + * until the queue is destroyed. + * + * The execution queue sits on top of the submission backend. It opaquely + * handles the GuC and Execlist backends whichever the platform uses, and + * the ring operations the different engine classes support. + */ + enum xe_exec_queue_sched_prop { XE_EXEC_QUEUE_JOB_TIMEOUT = 0, XE_EXEC_QUEUE_TIMESLICE = 1, -- cgit v1.2.3 From 846a81abbee3e6cfd238c350658a32199c39313d Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 1 Oct 2025 16:40:51 +0200 Subject: drm/xe: Detect GT workqueue allocation failure The allocation of the per-GT workqueue may fail and we shouldn't ignore that. While around use drm managed allocation function to drop our custom fini action. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251001144051.202040-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3e0ad7e5b5df..b77572a19548 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -65,29 +65,19 @@ #include "xe_wa.h" #include "xe_wopcm.h" -static void gt_fini(struct drm_device *drm, void *arg) -{ - struct xe_gt *gt = arg; - - destroy_workqueue(gt->ordered_wq); -} - struct xe_gt *xe_gt_alloc(struct xe_tile *tile) { + struct drm_device *drm = &tile_to_xe(tile)->drm; struct xe_gt *gt; - int err; - gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL); + gt = drmm_kzalloc(drm, sizeof(*gt), GFP_KERNEL); if (!gt) return ERR_PTR(-ENOMEM); gt->tile = tile; - gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", - WQ_MEM_RECLAIM); - - err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); - if (err) - return ERR_PTR(err); + gt->ordered_wq = drmm_alloc_ordered_workqueue(drm, "gt-ordered-wq", WQ_MEM_RECLAIM); + if (IS_ERR(gt->ordered_wq)) + return ERR_CAST(gt->ordered_wq); return gt; } -- cgit v1.2.3 From c97cdf7686138a60ba09bde360eb9f459953c1e2 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 1 Oct 2025 01:35:19 +0200 Subject: drm/xe/pf: Add top level functions to control VFs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have control functions that we use to control the VF state on the per-GT basis, but that is low level detail from the user point of view, who rather expects VF-level functions. For now add simple functions that just iterate over all GTs and call per-GT control function. We will soon allow to use some of them from the user facing interfaces like debugfs. Signed-off-by: Michal Wajdeczko Reviewed-by: Michał Winiarski Link: https://lore.kernel.org/r/20250930233525.201263-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_pci_sriov.c | 8 +-- drivers/gpu/drm/xe/xe_sriov_pf_control.c | 104 +++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf_control.h | 16 +++++ 4 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf_control.c create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf_control.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 00f7ce8e926d..f4b7b2bcaa59 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -174,6 +174,7 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt_ml.o \ xe_pci_sriov.o \ xe_sriov_pf.o \ + xe_sriov_pf_control.o \ xe_sriov_pf_debugfs.o \ xe_sriov_pf_service.o \ xe_tile_sriov_pf_debugfs.o diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index af05db07162e..9c1c9e669b04 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -17,6 +17,7 @@ #include "xe_pm.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_pf_control.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_printk.h" @@ -60,13 +61,10 @@ static void pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) { - struct xe_gt *gt; - unsigned int id; unsigned int n; - for_each_gt(gt, xe, id) - for (n = 1; n <= num_vfs; n++) - xe_gt_sriov_pf_control_trigger_flr(gt, n); + for (n = 1; n <= num_vfs; n++) + xe_sriov_pf_control_reset_vf(xe, n); } static struct pci_dev *xe_pci_pf_get_vf_dev(struct xe_device *xe, unsigned int vf_id) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_sriov_pf_control.c new file mode 100644 index 000000000000..56c0dd382262 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_device.h" +#include "xe_gt_sriov_pf_control.h" +#include "xe_sriov_pf_control.h" + +/** + * xe_sriov_pf_control_pause_vf() - Pause a VF on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier (can't be 0 == PFID) + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_pause_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_pause_vf(gt, vfid); + result = result ? -EUCLEAN : err; + } + + return result; +} + +/** + * xe_sriov_pf_control_resume_vf() - Resume a VF on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_resume_vf(gt, vfid); + result = result ? -EUCLEAN : err; + } + + return result; +} + +/** + * xe_sriov_pf_control_stop_vf - Stop a VF on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_stop_vf(gt, vfid); + result = result ? -EUCLEAN : err; + } + + return result; +} + +/** + * xe_sriov_pf_control_reset_vf() - Perform a VF reset (FLR). + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_trigger_flr(gt, vfid); + result = result ? -EUCLEAN : err; + } + + return result; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_sriov_pf_control.h new file mode 100644 index 000000000000..9bf059f746d4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_CONTROL_H_ +#define _XE_SRIOV_PF_CONTROL_H_ + +struct xe_device; + +int xe_sriov_pf_control_pause_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid); + +#endif -- cgit v1.2.3 From ac43294e8ec2227eb5fa3c0576f9ec07742eabb2 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 1 Oct 2025 01:35:20 +0200 Subject: drm/xe/pf: Log only top level VF state changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The user likely only care about top level VF state changes, any VF state logs on the per-GT basis can be demoted to the debug level. Signed-off-by: Michal Wajdeczko Reviewed-by: Michał Winiarski Link: https://lore.kernel.org/r/20250930233525.201263-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 6 +++--- drivers/gpu/drm/xe/xe_sriov_pf_control.c | 19 ++++++++++++++++--- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 4f7fff892bc0..e582f4d60015 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -616,7 +616,7 @@ int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid) } if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { - xe_gt_sriov_info(gt, "VF%u paused!\n", vfid); + xe_gt_sriov_dbg(gt, "VF%u paused!\n", vfid); return 0; } @@ -755,7 +755,7 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) return err; if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) { - xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid); + xe_gt_sriov_dbg(gt, "VF%u resumed!\n", vfid); return 0; } @@ -896,7 +896,7 @@ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) return err; if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { - xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid); + xe_gt_sriov_dbg(gt, "VF%u stopped!\n", vfid); return 0; } diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_sriov_pf_control.c index 56c0dd382262..fdadd4fbe985 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c @@ -6,6 +6,7 @@ #include "xe_device.h" #include "xe_gt_sriov_pf_control.h" #include "xe_sriov_pf_control.h" +#include "xe_sriov_printk.h" /** * xe_sriov_pf_control_pause_vf() - Pause a VF on all GTs. @@ -28,7 +29,11 @@ int xe_sriov_pf_control_pause_vf(struct xe_device *xe, unsigned int vfid) result = result ? -EUCLEAN : err; } - return result; + if (result) + return result; + + xe_sriov_info(xe, "VF%u paused!\n", vfid); + return 0; } /** @@ -52,7 +57,11 @@ int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid) result = result ? -EUCLEAN : err; } - return result; + if (result) + return result; + + xe_sriov_info(xe, "VF%u resumed!\n", vfid); + return 0; } /** @@ -76,7 +85,11 @@ int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid) result = result ? -EUCLEAN : err; } - return result; + if (result) + return result; + + xe_sriov_info(xe, "VF%u stopped!\n", vfid); + return 0; } /** -- cgit v1.2.3 From 5b7451fdd703dc582aac102bc4052b9bea501dfe Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 1 Oct 2025 01:35:21 +0200 Subject: drm/xe/pf: Expose VF control operations over debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To allow the user to control the activity of individual VFs, expose basic VF control operations (pause, resume, stop, reset) over the debugfs as write-only files: /sys/kernel/debug/dri/BDF/sriov/ ├── vf1 │ ├── pause │ ├── reset │ ├── resume │ ├── stop │ : ├── vf2 : : Signed-off-by: Michal Wajdeczko Reviewed-by: Michał Winiarski Link: https://lore.kernel.org/r/20250930233525.201263-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c | 93 ++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c index 2ab0b1f4818a..97636ed86fb8 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c @@ -8,13 +8,41 @@ #include "xe_device.h" #include "xe_device_types.h" +#include "xe_pm.h" #include "xe_sriov_pf.h" +#include "xe_sriov_pf_control.h" #include "xe_sriov_pf_debugfs.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_pf_service.h" #include "xe_sriov_printk.h" #include "xe_tile_sriov_pf_debugfs.h" +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov # d_inode->i_private = (xe_device*) + * │ ├── pf # d_inode->i_private = (xe_device*) + * │ ├── vf1 # d_inode->i_private = VFID(1) + * : : + * │ ├── vfN # d_inode->i_private = VFID(N) + */ + +static void *extract_priv(struct dentry *d) +{ + return d->d_inode->i_private; +} + +static struct xe_device *extract_xe(struct dentry *d) +{ + return extract_priv(d->d_parent); +} + +static unsigned int extract_vfid(struct dentry *d) +{ + void *p = extract_priv(d); + + return p == extract_xe(d) ? PFID : (uintptr_t)p; +} + static int simple_show(struct seq_file *m, void *data) { struct drm_printer p = drm_seq_file_printer(m); @@ -39,6 +67,70 @@ static void pf_populate_pf(struct xe_device *xe, struct dentry *pfdent) drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), pfdent, minor); } +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * │ ├── vf1 + * │ │ ├── pause + * │ │ ├── reset + * │ │ ├── resume + * │ │ ├── stop + * │ │ : + * │ ├── vf2 + * │ │ ├── ... + */ + +static ssize_t from_file_write_to_vf_call(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos, + int (*call)(struct xe_device *, unsigned int)) +{ + struct dentry *dent = file_dentry(file)->d_parent; + struct xe_device *xe = extract_xe(dent); + unsigned int vfid = extract_vfid(dent); + bool yes; + int ret; + + if (*ppos) + return -EINVAL; + ret = kstrtobool_from_user(userbuf, count, &yes); + if (ret < 0) + return ret; + if (yes) { + xe_pm_runtime_get(xe); + ret = call(xe, vfid); + xe_pm_runtime_put(xe); + } + if (ret < 0) + return ret; + return count; +} + +#define DEFINE_VF_CONTROL_ATTRIBUTE(OP) \ +static int OP##_show(struct seq_file *s, void *unused) \ +{ \ + return 0; \ +} \ +static ssize_t OP##_write(struct file *file, const char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + return from_file_write_to_vf_call(file, userbuf, count, ppos, \ + xe_sriov_pf_control_##OP); \ +} \ +DEFINE_SHOW_STORE_ATTRIBUTE(OP) + +DEFINE_VF_CONTROL_ATTRIBUTE(pause_vf); +DEFINE_VF_CONTROL_ATTRIBUTE(resume_vf); +DEFINE_VF_CONTROL_ATTRIBUTE(stop_vf); +DEFINE_VF_CONTROL_ATTRIBUTE(reset_vf); + +static void pf_populate_vf(struct xe_device *xe, struct dentry *vfdent) +{ + debugfs_create_file("pause", 0200, vfdent, xe, &pause_vf_fops); + debugfs_create_file("resume", 0200, vfdent, xe, &resume_vf_fops); + debugfs_create_file("stop", 0200, vfdent, xe, &stop_vf_fops); + debugfs_create_file("reset", 0200, vfdent, xe, &reset_vf_fops); +} + static void pf_populate_with_tiles(struct xe_device *xe, struct dentry *dent, unsigned int vfid) { struct xe_tile *tile; @@ -103,6 +195,7 @@ void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) return; vfdent->d_inode->i_private = (void *)(uintptr_t)VFID(n); + pf_populate_vf(xe, vfdent); pf_populate_with_tiles(xe, vfdent, VFID(n)); } } -- cgit v1.2.3 From 1f018c8496a15ef116e2533af72e75834d1cdd34 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 1 Oct 2025 01:35:22 +0200 Subject: drm/xe/pf: Unify VF state tracking log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By using single function that dumps VF state transition, final logs are easier to analyze as there is always the same call site in every debug message. Signed-off-by: Michal Wajdeczko Reviewed-by: Michał Winiarski Link: https://lore.kernel.org/r/20250930233525.201263-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index e582f4d60015..90fb69b0e041 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -271,12 +271,19 @@ static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid, return result; } +static void pf_track_vf_state(struct xe_gt *gt, unsigned int vfid, + enum xe_gt_sriov_control_bits bit, + const char *what) +{ + xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) %s\n", + vfid, control_bit_to_string(bit), bit, what); +} + static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid, enum xe_gt_sriov_control_bits bit) { if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) { - xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n", - vfid, control_bit_to_string(bit), bit); + pf_track_vf_state(gt, vfid, bit, "enter"); return true; } return false; @@ -286,8 +293,7 @@ static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid, enum xe_gt_sriov_control_bits bit) { if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) { - xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n", - vfid, control_bit_to_string(bit), bit); + pf_track_vf_state(gt, vfid, bit, "exit"); return true; } return false; -- cgit v1.2.3 From 03dc00c78221f221bec9d8eef1534ce8675bed9d Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 1 Oct 2025 01:35:23 +0200 Subject: drm/xe/pf: Split VF FLR processing function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On multi-GT platforms (like PTL) we may want to run VF FLR on each GuC (render and media) in parallel. Split our FLR function to allow to wait for GT VF FLR completion separately. Signed-off-by: Michal Wajdeczko Reviewed-by: Michał Winiarski Link: https://lore.kernel.org/r/20250930233525.201263-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 22 +++++++++++++++++++++- drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h | 1 + drivers/gpu/drm/xe/xe_sriov_pf_control.c | 5 +++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 90fb69b0e041..491918d6b93b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -1172,11 +1172,31 @@ static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) * Return: 0 on success or a negative error code on failure. */ int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_flr_wip(gt, vfid); + + return 0; +} + +/** + * xe_gt_sriov_pf_control_wait_flr() - Wait for a VF FLR to complete. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid) { unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP); int err; - pf_enter_vf_flr_wip(gt, vfid); + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) + return -EIO; + + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) + return 0; err = pf_wait_vf_wip_done(gt, vfid, timeout); if (err) { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h index c85e64f099cc..fd256866f628 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h @@ -18,6 +18,7 @@ int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid); #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_sriov_pf_control.c index fdadd4fbe985..e1c54a8feb07 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c @@ -113,5 +113,10 @@ int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid) result = result ? -EUCLEAN : err; } + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_wait_flr(gt, vfid); + result = result ? -EUCLEAN : err; + } + return result; } -- cgit v1.2.3 From 2a8fcf7cc950e6406541a3a2464192c6646392cb Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 1 Oct 2025 01:35:24 +0200 Subject: drm/xe/pf: Synchronize VF FLR between all GTs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PF part of the VF FLR processing shall be done after all GuCs confirm that they finished their part VF FLR processing, otherwise PF may start clearing VF's GGTT that other GuC may still accessing. Signed-off-by: Michal Wajdeczko Reviewed-by: Michał Winiarski Link: https://lore.kernel.org/r/20250930233525.201263-7-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 56 ++++++++++++++++++++++- drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h | 2 + drivers/gpu/drm/xe/xe_sriov_pf_control.c | 29 ++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf_control.h | 1 + 5 files changed, 88 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 491918d6b93b..2e6bd3d1fe1d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -18,6 +18,7 @@ #include "xe_gt_sriov_printk.h" #include "xe_guc_ct.h" #include "xe_sriov.h" +#include "xe_sriov_pf_control.h" #include "xe_sriov_pf_service.h" #include "xe_tile.h" @@ -170,6 +171,7 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) CASE2STR(FLR_SEND_START); CASE2STR(FLR_WAIT_GUC); CASE2STR(FLR_GUC_DONE); + CASE2STR(FLR_SYNC); CASE2STR(FLR_RESET_CONFIG); CASE2STR(FLR_RESET_DATA); CASE2STR(FLR_RESET_MMIO); @@ -940,6 +942,10 @@ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) * : v : | | * : FLR_GUC_DONE : | | * : | : | | + * : | o--<--sync : | | + * : |/ / : | | + * : FLR_SYNC--o : | | + * : | : | | * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o * : | : | | * : FLR_RESET_DATA : | | @@ -1147,12 +1153,38 @@ static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) return true; } +static bool pf_exit_vf_flr_sync(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) + return false; + + pf_enter_vf_flr_reset_config(gt, vfid); + return true; +} + +static void pf_enter_vf_flr_sync(struct xe_gt *gt, unsigned int vfid) +{ + int ret; + + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) + pf_enter_vf_state_machine_bug(gt, vfid); + + ret = xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid); + if (ret < 0) { + xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint %pe\n", ERR_PTR(ret)); + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC); + } else { + xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint pass\n"); + pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC); + } +} + static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) { if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) return false; - pf_enter_vf_flr_reset_config(gt, vfid); + pf_enter_vf_flr_sync(gt, vfid); return true; } @@ -1178,6 +1210,28 @@ int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) return 0; } +/** + * xe_gt_sriov_pf_control_sync_flr() - Synchronize on the VF FLR checkpoint. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @sync: if true it will allow to exit the checkpoint + * + * Return: non-zero if FLR checkpoint has been reached, zero if the is no FLR + * in progress, or a negative error code on the FLR busy or failed. + */ +int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync) +{ + if (sync && pf_exit_vf_flr_sync(gt, vfid)) + return 1; + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) + return 1; + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) + return -EBUSY; + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) + return -EIO; + return 0; +} + /** * xe_gt_sriov_pf_control_wait_flr() - Wait for a VF FLR to complete. * @gt: the &xe_gt diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h index fd256866f628..8a72ef3778d4 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h @@ -18,6 +18,7 @@ int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync); int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid); #ifdef CONFIG_PCI_IOV diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h index f02f941b4ad2..c80b7e77f1ad 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h @@ -18,6 +18,7 @@ * @XE_GT_SRIOV_STATE_FLR_SEND_START: indicates that the PF wants to send a FLR START command. * @XE_GT_SRIOV_STATE_FLR_WAIT_GUC: indicates that the PF awaits for a response from the GuC. * @XE_GT_SRIOV_STATE_FLR_GUC_DONE: indicates that the PF has received a response from the GuC. + * @XE_GT_SRIOV_STATE_FLR_SYNC: indicates that the PF awaits to synchronize with other GuCs. * @XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: indicates that the PF needs to clear VF's resources. * @XE_GT_SRIOV_STATE_FLR_RESET_DATA: indicates that the PF needs to clear VF's data. * @XE_GT_SRIOV_STATE_FLR_RESET_MMIO: indicates that the PF needs to reset VF's registers. @@ -47,6 +48,7 @@ enum xe_gt_sriov_control_bits { XE_GT_SRIOV_STATE_FLR_SEND_START, XE_GT_SRIOV_STATE_FLR_WAIT_GUC, XE_GT_SRIOV_STATE_FLR_GUC_DONE, + XE_GT_SRIOV_STATE_FLR_SYNC, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG, XE_GT_SRIOV_STATE_FLR_RESET_DATA, XE_GT_SRIOV_STATE_FLR_RESET_MMIO, diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_sriov_pf_control.c index e1c54a8feb07..416d00a03fbb 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c @@ -120,3 +120,32 @@ int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid) return result; } + +/** + * xe_sriov_pf_control_sync_flr() - Synchronize a VF FLR between all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int ret; + + for_each_gt(gt, xe, id) { + ret = xe_gt_sriov_pf_control_sync_flr(gt, vfid, false); + if (ret < 0) + return ret; + } + for_each_gt(gt, xe, id) { + ret = xe_gt_sriov_pf_control_sync_flr(gt, vfid, true); + if (ret < 0) + return ret; + } + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_sriov_pf_control.h index 9bf059f746d4..2d52d0ac1b28 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.h @@ -12,5 +12,6 @@ int xe_sriov_pf_control_pause_vf(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid); #endif -- cgit v1.2.3 From 0892507f4a0b76eb897afc2bacca85e172512379 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Oct 2025 12:29:09 +0300 Subject: mfd: ls2kbmc: Fix an IS_ERR() vs NULL check in probe() The devm_kzalloc() function returns NULL on error so check for that instead of error pointers. Fixes: d952bba3fbb5 ("mfd: ls2kbmc: Add Loongson-2K BMC reset function support") Signed-off-by: Dan Carpenter Message-ID: Signed-off-by: Corey Minyard --- drivers/mfd/ls2k-bmc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/ls2k-bmc-core.c b/drivers/mfd/ls2k-bmc-core.c index e162b3c7c9f8..5f38514fa89e 100644 --- a/drivers/mfd/ls2k-bmc-core.c +++ b/drivers/mfd/ls2k-bmc-core.c @@ -469,7 +469,7 @@ static int ls2k_bmc_probe(struct pci_dev *dev, const struct pci_device_id *id) return ret; ddata = devm_kzalloc(&dev->dev, sizeof(*ddata), GFP_KERNEL); - if (IS_ERR(ddata)) { + if (!ddata) { ret = -ENOMEM; goto disable_pci; } -- cgit v1.2.3 From 4af66c2bcab06e6e515b23139122e745d7619680 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Oct 2025 12:29:18 +0300 Subject: mfd: ls2kbmc: check for devm_mfd_add_devices() failure Call pci_disable_device() if devm_mfd_add_devices() fails. Fixes: 0d64f6d1ffe9 ("mfd: ls2kbmc: Introduce Loongson-2K BMC core driver") Signed-off-by: Dan Carpenter Message-ID: Signed-off-by: Corey Minyard --- drivers/mfd/ls2k-bmc-core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/mfd/ls2k-bmc-core.c b/drivers/mfd/ls2k-bmc-core.c index 5f38514fa89e..69387dad6661 100644 --- a/drivers/mfd/ls2k-bmc-core.c +++ b/drivers/mfd/ls2k-bmc-core.c @@ -495,9 +495,13 @@ static int ls2k_bmc_probe(struct pci_dev *dev, const struct pci_device_id *id) goto disable_pci; } - return devm_mfd_add_devices(&dev->dev, PLATFORM_DEVID_AUTO, - ls2k_bmc_cells, ARRAY_SIZE(ls2k_bmc_cells), - &dev->resource[0], 0, NULL); + ret = devm_mfd_add_devices(&dev->dev, PLATFORM_DEVID_AUTO, + ls2k_bmc_cells, ARRAY_SIZE(ls2k_bmc_cells), + &dev->resource[0], 0, NULL); + if (ret) + goto disable_pci; + + return 0; disable_pci: pci_disable_device(dev); -- cgit v1.2.3 From 08c98f3f2bd7bf5dddd98b17c7caf94d07fad107 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 3 Oct 2025 01:38:22 +0200 Subject: Revert "drm/xe/vf: Rebase exec queue parallel commands during migration recovery" This reverts commit ba180a362128cb71d16c3f0ce6645448011d2607. Due to change in the VF migration recovery design this code is not needed any more. v3: - Add commit message (Michal / Lucas) Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Signed-off-by: Michal Wajdeczko Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20251002233824.203417-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_actions_abi.h | 8 ----- drivers/gpu/drm/xe/xe_guc_submit.c | 54 -------------------------------- 2 files changed, 62 deletions(-) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index 31090c69dfbe..47756e4674a1 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -196,14 +196,6 @@ enum xe_guc_register_context_multi_lrc_param_offsets { XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN = 11, }; -enum xe_guc_context_wq_item_offsets { - XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN = 0, - XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW, - XE_GUC_CONTEXT_WQ_EL_INFO_DATA_2_GUCCTX_RINGTAIL_FREEZEPOCS, - XE_GUC_CONTEXT_WQ_EL_INFO_DATA_3_WI_FENCE_ID, - XE_GUC_CONTEXT_WQ_EL_CHILD_LIST_DATA_4_RINGTAIL, -}; - enum xe_guc_report_status { XE_GUC_REPORT_STATUS_UNKNOWN = 0x0, XE_GUC_REPORT_STATUS_ACKED = 0x1, diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 53024eb5670b..3ac0950f55be 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -735,18 +735,12 @@ static void wq_item_append(struct xe_exec_queue *q) if (wq_wait_for_space(q, wqi_size)) return; - xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN); wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | FIELD_PREP(WQ_LEN_MASK, len_dw); - xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW); wqi[i++] = xe_lrc_descriptor(q->lrc[0]); - xe_gt_assert(guc_to_gt(guc), i == - XE_GUC_CONTEXT_WQ_EL_INFO_DATA_2_GUCCTX_RINGTAIL_FREEZEPOCS); wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); - xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_3_WI_FENCE_ID); wqi[i++] = 0; - xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_CHILD_LIST_DATA_4_RINGTAIL); for (j = 1; j < q->width; ++j) { struct xe_lrc *lrc = q->lrc[j]; @@ -767,50 +761,6 @@ static void wq_item_append(struct xe_exec_queue *q) parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); } -static int wq_items_rebase(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); - int i = q->guc->wqi_head; - - /* the ring starts after a header struct */ - iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[0])); - - while ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) { - u32 len_dw, type, val; - - if (drm_WARN_ON_ONCE(&xe->drm, i < 0 || i > 2 * WQ_SIZE)) - break; - - val = xe_map_rd_ring_u32(xe, &map, i / sizeof(u32) + - XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN, - WQ_SIZE / sizeof(u32)); - len_dw = FIELD_GET(WQ_LEN_MASK, val); - type = FIELD_GET(WQ_TYPE_MASK, val); - - if (drm_WARN_ON_ONCE(&xe->drm, len_dw >= WQ_SIZE / sizeof(u32))) - break; - - if (type == WQ_TYPE_MULTI_LRC) { - val = xe_lrc_descriptor(q->lrc[0]); - xe_map_wr_ring_u32(xe, &map, i / sizeof(u32) + - XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW, - WQ_SIZE / sizeof(u32), val); - } else if (drm_WARN_ON_ONCE(&xe->drm, type != WQ_TYPE_NOOP)) { - break; - } - - i += (len_dw + 1) * sizeof(u32); - } - - if ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) { - xe_gt_err(q->gt, "Exec queue fixups incomplete - wqi parse failed\n"); - return -EBADMSG; - } - return 0; -} - #define RESUME_PENDING ~0x0ull static void submit_exec_queue(struct xe_exec_queue *q) { @@ -2669,10 +2619,6 @@ int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); if (err) break; - if (xe_exec_queue_is_parallel(q)) - err = wq_items_rebase(q); - if (err) - break; } mutex_unlock(&guc->submission_state.lock); -- cgit v1.2.3 From 6c640592e84cd04cd6c61bf7afd83808cc20c9f4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 3 Oct 2025 01:38:23 +0200 Subject: Revert "drm/xe/vf: Post migration, repopulate ring area for pending request" This reverts commit a0dda25d24e636df5c30a9370464b7cebc709faf. Due to change in the VF migration recovery design this code is not needed any more. v3: - Add commit message (Michal / Lucas) Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Signed-off-by: Michal Wajdeczko Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20251002233824.203417-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 24 ------------------------ drivers/gpu/drm/xe/xe_exec_queue.h | 3 +-- drivers/gpu/drm/xe/xe_guc_submit.c | 24 ------------------------ drivers/gpu/drm/xe/xe_guc_submit.h | 2 -- drivers/gpu/drm/xe/xe_sriov_vf.c | 1 - 5 files changed, 1 insertion(+), 53 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index db3f869b53f3..9a251abe85f9 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -1146,27 +1146,3 @@ int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) return err; } - -/** - * xe_exec_queue_jobs_ring_restore - Re-emit ring commands of requests pending on given queue. - * @q: the &xe_exec_queue struct instance - */ -void xe_exec_queue_jobs_ring_restore(struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_sched_job *job; - - /* - * This routine is used within VF migration recovery. This means - * using the lock here introduces a restriction: we cannot wait - * for any GFX HW response while the lock is taken. - */ - spin_lock(&sched->base.job_list_lock); - list_for_each_entry(job, &sched->base.pending_list, drm.list) { - if (xe_sched_job_is_error(job)) - continue; - - q->ring_ops->emit_job(job); - } - spin_unlock(&sched->base.job_list_lock); -} diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 15ec852e7f7e..8821ceb838d0 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -92,7 +92,6 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q); int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch); -void xe_exec_queue_jobs_ring_restore(struct xe_exec_queue *q); - struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q); + #endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 3ac0950f55be..16f78376f196 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -845,30 +845,6 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) return fence; } -/** - * xe_guc_jobs_ring_rebase - Re-emit ring commands of requests pending - * on all queues under a guc. - * @guc: the &xe_guc struct instance - */ -void xe_guc_jobs_ring_rebase(struct xe_guc *guc) -{ - struct xe_exec_queue *q; - unsigned long index; - - /* - * This routine is used within VF migration recovery. This means - * using the lock here introduces a restriction: we cannot wait - * for any GFX HW response while the lock is taken. - */ - mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { - if (exec_queue_killed_or_banned_or_wedged(q)) - continue; - xe_exec_queue_jobs_ring_restore(q); - } - mutex_unlock(&guc->submission_state.lock); -} - static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 78c3f07e31a0..5b4a0a6fd818 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -36,8 +36,6 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len); -void xe_guc_jobs_ring_rebase(struct xe_guc *guc); - struct xe_guc_submit_exec_queue_snapshot * xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q); void diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index cdd9f8e78b2a..d59e2b55cc95 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -334,7 +334,6 @@ static int gt_vf_post_migration_fixups(struct xe_gt *gt) err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); if (err) goto out; - xe_guc_jobs_ring_rebase(>->uc.guc); xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); } -- cgit v1.2.3 From bdc2fb17ae9952bbce28d60ebab2520ed937363d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 3 Oct 2025 01:38:24 +0200 Subject: Revert "drm/xe/vf: Fixup CTB send buffer messages after migration" This reverts commit cef88d1265cac7d415606af73ba58926fd3cd8b7. Due to change in the VF migration recovery design this code is not needed any more. v3: - Add commit message (Michal / Lucas) Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Signed-off-by: Michal Wajdeczko Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20251002233824.203417-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 183 --------------------------------------- drivers/gpu/drm/xe/xe_guc_ct.h | 2 - drivers/gpu/drm/xe/xe_map.h | 18 ---- drivers/gpu/drm/xe/xe_sriov_vf.c | 2 - 4 files changed, 205 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 18f6327bf552..47079ab9922c 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -25,7 +25,6 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_monitor.h" -#include "xe_gt_sriov_printk.h" #include "xe_guc.h" #include "xe_guc_log.h" #include "xe_guc_relay.h" @@ -93,8 +92,6 @@ struct g2h_fence { bool done; }; -#define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) - static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) { memset(g2h_fence, 0, sizeof(*g2h_fence)); @@ -1793,186 +1790,6 @@ static void g2h_worker_func(struct work_struct *w) receive_g2h(ct); } -static void xe_fixup_u64_in_cmds(struct xe_device *xe, struct iosys_map *cmds, - u32 size, u32 idx, s64 shift) -{ - u32 hi, lo; - u64 offset; - - lo = xe_map_rd_ring_u32(xe, cmds, idx, size); - hi = xe_map_rd_ring_u32(xe, cmds, idx + 1, size); - offset = make_u64(hi, lo); - offset += shift; - lo = lower_32_bits(offset); - hi = upper_32_bits(offset); - xe_map_wr_ring_u32(xe, cmds, idx, size, lo); - xe_map_wr_ring_u32(xe, cmds, idx + 1, size, hi); -} - -/* - * Shift any GGTT addresses within a single message left within CTB from - * before post-migration recovery. - * @ct: pointer to CT struct of the target GuC - * @cmds: iomap buffer containing CT messages - * @head: start of the target message within the buffer - * @len: length of the target message - * @size: size of the commands buffer - * @shift: the address shift to be added to each GGTT reference - * Return: true if the message was fixed or needed no fixups, false on failure - */ -static bool ct_fixup_ggtt_in_message(struct xe_guc_ct *ct, - struct iosys_map *cmds, u32 head, - u32 len, u32 size, s64 shift) -{ - struct xe_gt *gt = ct_to_gt(ct); - struct xe_device *xe = ct_to_xe(ct); - u32 msg[GUC_HXG_MSG_MIN_LEN]; - u32 action, i, n; - - xe_gt_assert(gt, len >= GUC_HXG_MSG_MIN_LEN); - - msg[0] = xe_map_rd_ring_u32(xe, cmds, head, size); - action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]); - - xe_gt_sriov_dbg_verbose(gt, "fixing H2G %#x\n", action); - - switch (action) { - case XE_GUC_ACTION_REGISTER_CONTEXT: - if (len != XE_GUC_REGISTER_CONTEXT_MSG_LEN) - goto err_len; - xe_fixup_u64_in_cmds(xe, cmds, size, head + - XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER, - shift); - xe_fixup_u64_in_cmds(xe, cmds, size, head + - XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER, - shift); - xe_fixup_u64_in_cmds(xe, cmds, size, head + - XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR, shift); - break; - case XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC: - if (len < XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN) - goto err_len; - n = xe_map_rd_ring_u32(xe, cmds, head + - XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS, size); - if (len != XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN + 2 * n) - goto err_len; - xe_fixup_u64_in_cmds(xe, cmds, size, head + - XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER, - shift); - xe_fixup_u64_in_cmds(xe, cmds, size, head + - XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER, - shift); - for (i = 0; i < n; i++) - xe_fixup_u64_in_cmds(xe, cmds, size, head + - XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR - + 2 * i, shift); - break; - default: - break; - } - return true; - -err_len: - xe_gt_err(gt, "Skipped G2G %#x message fixups, unexpected length (%u)\n", action, len); - return false; -} - -/* - * Apply fixups to the next outgoing CT message within given CTB - * @ct: the &xe_guc_ct struct instance representing the target GuC - * @h2g: the &guc_ctb struct instance of the target buffer - * @shift: shift to be added to all GGTT addresses within the CTB - * @mhead: pointer to an integer storing message start position; the - * position is changed to next message before this function return - * @avail: size of the area available for parsing, that is length - * of all remaining messages stored within the CTB - * Return: size of the area available for parsing after one message - * has been parsed, that is length remaining from the updated mhead - */ -static int ct_fixup_ggtt_in_buffer(struct xe_guc_ct *ct, struct guc_ctb *h2g, - s64 shift, u32 *mhead, s32 avail) -{ - struct xe_gt *gt = ct_to_gt(ct); - struct xe_device *xe = ct_to_xe(ct); - u32 msg[GUC_HXG_MSG_MIN_LEN]; - u32 size = h2g->info.size; - u32 head = *mhead; - u32 len; - - xe_gt_assert(gt, avail >= (s32)GUC_CTB_MSG_MIN_LEN); - - /* Read header */ - msg[0] = xe_map_rd_ring_u32(xe, &h2g->cmds, head, size); - len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN; - - if (unlikely(len > (u32)avail)) { - xe_gt_err(gt, "H2G channel broken on read, avail=%d, len=%d, fixups skipped\n", - avail, len); - return 0; - } - - head = (head + GUC_CTB_MSG_MIN_LEN) % size; - if (!ct_fixup_ggtt_in_message(ct, &h2g->cmds, head, msg_len_to_hxg_len(len), size, shift)) - return 0; - *mhead = (head + msg_len_to_hxg_len(len)) % size; - - return avail - len; -} - -/** - * xe_guc_ct_fixup_messages_with_ggtt - Fixup any pending H2G CTB messages - * @ct: pointer to CT struct of the target GuC - * @ggtt_shift: shift to be added to all GGTT addresses within the CTB - * - * Messages in GuC to Host CTB are owned by GuC and any fixups in them - * are made by GuC. But content of the Host to GuC CTB is owned by the - * KMD, so fixups to GGTT references in any pending messages need to be - * applied here. - * This function updates GGTT offsets in payloads of pending H2G CTB - * messages (messages which were not consumed by GuC before the VF got - * paused). - */ -void xe_guc_ct_fixup_messages_with_ggtt(struct xe_guc_ct *ct, s64 ggtt_shift) -{ - struct guc_ctb *h2g = &ct->ctbs.h2g; - struct xe_guc *guc = ct_to_guc(ct); - struct xe_gt *gt = guc_to_gt(guc); - u32 head, tail, size; - s32 avail; - - if (unlikely(h2g->info.broken)) - return; - - h2g->info.head = desc_read(ct_to_xe(ct), h2g, head); - head = h2g->info.head; - tail = READ_ONCE(h2g->info.tail); - size = h2g->info.size; - - if (unlikely(head > size)) - goto corrupted; - - if (unlikely(tail >= size)) - goto corrupted; - - avail = tail - head; - - /* beware of buffer wrap case */ - if (unlikely(avail < 0)) - avail += size; - xe_gt_dbg(gt, "available %d (%u:%u:%u)\n", avail, head, tail, size); - xe_gt_assert(gt, avail >= 0); - - while (avail > 0) - avail = ct_fixup_ggtt_in_buffer(ct, h2g, ggtt_shift, &head, avail); - - return; - -corrupted: - xe_gt_err(gt, "Corrupted H2G descriptor head=%u tail=%u size=%u, fixups not applied\n", - head, tail, size); - h2g->info.broken = true; -} - static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic, bool want_ctb) { diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index cf41210ab30a..d6c81325a76c 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -24,8 +24,6 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_pr void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb); -void xe_guc_ct_fixup_messages_with_ggtt(struct xe_guc_ct *ct, s64 ggtt_shift); - static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct) { return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED; diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h index 8d67f6ba2d95..f62e0c8b67ab 100644 --- a/drivers/gpu/drm/xe/xe_map.h +++ b/drivers/gpu/drm/xe/xe_map.h @@ -78,24 +78,6 @@ static inline void xe_map_write32(struct xe_device *xe, struct iosys_map *map, iosys_map_wr(map__, offset__, type__, val__); \ }) -#define xe_map_rd_array(xe__, map__, index__, type__) \ - xe_map_rd(xe__, map__, (index__) * sizeof(type__), type__) - -#define xe_map_wr_array(xe__, map__, index__, type__, val__) \ - xe_map_wr(xe__, map__, (index__) * sizeof(type__), type__, val__) - -#define xe_map_rd_array_u32(xe__, map__, index__) \ - xe_map_rd_array(xe__, map__, index__, u32) - -#define xe_map_wr_array_u32(xe__, map__, index__, val__) \ - xe_map_wr_array(xe__, map__, index__, u32, val__) - -#define xe_map_rd_ring_u32(xe__, map__, index__, size__) \ - xe_map_rd_array_u32(xe__, map__, (index__) % (size__)) - -#define xe_map_wr_ring_u32(xe__, map__, index__, size__, val__) \ - xe_map_wr_array_u32(xe__, map__, (index__) % (size__), val__) - #define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({ \ struct xe_device *__xe = xe__; \ xe_device_assert_mem_access(__xe); \ diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index d59e2b55cc95..c1830ec8f0fd 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -12,7 +12,6 @@ #include "xe_gt_sriov_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" -#include "xe_guc_ct.h" #include "xe_guc_submit.h" #include "xe_irq.h" #include "xe_lrc.h" @@ -334,7 +333,6 @@ static int gt_vf_post_migration_fixups(struct xe_gt *gt) err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); if (err) goto out; - xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); } out: -- cgit v1.2.3 From db7dde99049f04b99cbf518be3a189b3e9f5c1d8 Mon Sep 17 00:00:00 2001 From: Piotr Piórkowski Date: Fri, 3 Oct 2025 18:26:15 +0200 Subject: drm/xe: Add initial support for separate kernel VRAM region on the tile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far, kernel and userspace allocations have shared the same VRAM region. However, in some scenarios, it may be necessary to reserve a separate VRAM area exclusively for kernel allocations. Let's add preliminary support for such a configuration. v2: - replaced for_each_bo_flag_vram with the improved for_each_set_bo_vram_flag helper (Matthew) - moved the VRAM flag iteration macro definition into xe_bo.c (Matthew) - drop unused bo_flgas from bo_vram_flags_to_vram_placement (Matthew) - use hweight32 helper in __xe_bo_fixed_placement for readability (Matthew) v3: remove unnecessary VRAM fixup id Signed-off-by: Piotr Piórkowski Cc: Matthew Auld Reviewed-by: Matthew Auld Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251003162619.1984236-2-piotr.piorkowski@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 78 ++++++++++++++++++++++++------------ drivers/gpu/drm/xe/xe_bo.h | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 10 ++++- drivers/gpu/drm/xe/xe_tile.c | 8 ++++ drivers/gpu/drm/xe/xe_vram.c | 6 ++- 5 files changed, 75 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4410e28dee54..ce20f58ee80e 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -34,6 +34,7 @@ #include "xe_res_cursor.h" #include "xe_shrinker.h" #include "xe_sriov_vf_ccs.h" +#include "xe_tile.h" #include "xe_trace_bo.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" @@ -81,6 +82,10 @@ static struct ttm_placement tt_placement = { .placement = tt_placement_flags, }; +#define for_each_set_bo_vram_flag(bit__, bo_flags__) \ + for (unsigned int __bit_tmp = BIT(0); __bit_tmp <= XE_BO_FLAG_VRAM_MASK; __bit_tmp <<= 1) \ + for_each_if(((bit__) = __bit_tmp) & (bo_flags__) & XE_BO_FLAG_VRAM_MASK) + bool mem_type_is_vram(u32 mem_type) { return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN; @@ -213,6 +218,27 @@ static bool force_contiguous(u32 bo_flags) bo_flags & XE_BO_FLAG_PINNED; } +static u8 vram_bo_flag_to_tile_id(struct xe_device *xe, u32 vram_bo_flag) +{ + xe_assert(xe, vram_bo_flag & XE_BO_FLAG_VRAM_MASK); + xe_assert(xe, (vram_bo_flag & (vram_bo_flag - 1)) == 0); + + return __ffs(vram_bo_flag >> (__ffs(XE_BO_FLAG_VRAM0) - 1)) - 1; +} + +static u32 bo_vram_flags_to_vram_placement(struct xe_device *xe, u32 vram_flag, + enum ttm_bo_type type) +{ + u8 tile_id = vram_bo_flag_to_tile_id(xe, vram_flag); + + xe_assert(xe, tile_id < xe->info.tile_count); + + if (type == ttm_bo_type_kernel) + return xe->tiles[tile_id].mem.kernel_vram->placement; + else + return xe->tiles[tile_id].mem.vram->placement; +} + static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c) { @@ -245,12 +271,15 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, } static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, - u32 bo_flags, u32 *c) + u32 bo_flags, enum ttm_bo_type type, u32 *c) { - if (bo_flags & XE_BO_FLAG_VRAM0) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - if (bo_flags & XE_BO_FLAG_VRAM1) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); + u32 vram_flag; + + for_each_set_bo_vram_flag(vram_flag, bo_flags) { + u32 pl = bo_vram_flags_to_vram_placement(xe, vram_flag, type); + + add_vram(xe, bo, bo->placements, bo_flags, pl, c); + } } static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, @@ -269,11 +298,11 @@ static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, } static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, - u32 bo_flags) + u32 bo_flags, enum ttm_bo_type type) { u32 c = 0; - try_add_vram(xe, bo, bo_flags, &c); + try_add_vram(xe, bo, bo_flags, type, &c); try_add_system(xe, bo, bo_flags, &c); try_add_stolen(xe, bo, bo_flags, &c); @@ -289,10 +318,10 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, } int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, - u32 bo_flags) + u32 bo_flags, enum ttm_bo_type type) { xe_bo_assert_held(bo); - return __xe_bo_placement_for_flags(xe, bo, bo_flags); + return __xe_bo_placement_for_flags(xe, bo, bo_flags, type); } static void xe_evict_flags(struct ttm_buffer_object *tbo, @@ -2164,7 +2193,7 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, xe_validation_assert_exec(xe, exec, &bo->ttm.base); if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) { - err = __xe_bo_placement_for_flags(xe, bo, bo->flags); + err = __xe_bo_placement_for_flags(xe, bo, bo->flags, type); if (WARN_ON(err)) { xe_ttm_bo_destroy(&bo->ttm); return ERR_PTR(err); @@ -2222,34 +2251,31 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, } static int __xe_bo_fixed_placement(struct xe_device *xe, - struct xe_bo *bo, + struct xe_bo *bo, enum ttm_bo_type type, u32 flags, u64 start, u64 end, u64 size) { struct ttm_place *place = bo->placements; + u32 vram_flag, vram_stolen_flags; if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM)) return -EINVAL; + vram_flag = flags & XE_BO_FLAG_VRAM_MASK; + vram_stolen_flags = (flags & (XE_BO_FLAG_STOLEN)) | vram_flag; + + /* check if more than one VRAM/STOLEN flag is set */ + if (hweight32(vram_stolen_flags) > 1) + return -EINVAL; + place->flags = TTM_PL_FLAG_CONTIGUOUS; place->fpfn = start >> PAGE_SHIFT; place->lpfn = end >> PAGE_SHIFT; - switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) { - case XE_BO_FLAG_VRAM0: - place->mem_type = XE_PL_VRAM0; - break; - case XE_BO_FLAG_VRAM1: - place->mem_type = XE_PL_VRAM1; - break; - case XE_BO_FLAG_STOLEN: + if (flags & XE_BO_FLAG_STOLEN) place->mem_type = XE_PL_STOLEN; - break; - - default: - /* 0 or multiple of the above set */ - return -EINVAL; - } + else + place->mem_type = bo_vram_flags_to_vram_placement(xe, vram_flag, type); bo->placement = (struct ttm_placement) { .num_placement = 1, @@ -2278,7 +2304,7 @@ __xe_bo_create_locked(struct xe_device *xe, return bo; flags |= XE_BO_FLAG_FIXED_PLACEMENT; - err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size); + err = __xe_bo_fixed_placement(xe, bo, type, flags, start, end, size); if (err) { xe_bo_free(bo); return ERR_PTR(err); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index a77af42b5f9e..9a8401300b15 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -122,7 +122,7 @@ struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_til int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src); int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, - u32 bo_flags); + u32 bo_flags, enum ttm_bo_type type); static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo) { diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 74d7af830b85..989244418950 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -158,7 +158,15 @@ struct xe_tile { /** @mem: memory management info for tile */ struct { /** - * @mem.vram: VRAM info for tile. + * @mem.kernel_vram: kernel-dedicated VRAM info for tile. + * + * Although VRAM is associated with a specific tile, it can + * still be accessed by all tiles' GTs. + */ + struct xe_vram_region *kernel_vram; + + /** + * @mem.vram: general purpose VRAM info for tile. * * Although VRAM is associated with a specific tile, it can * still be accessed by all tiles' GTs. diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index d49ba3401963..6edb5062c1da 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -124,6 +124,14 @@ int xe_tile_alloc_vram(struct xe_tile *tile) return -ENOMEM; tile->mem.vram = vram; + /* + * If the kernel_vram is not already allocated, + * it means that tile has common VRAM region for + * kernel and user space. + */ + if (!tile->mem.kernel_vram) + tile->mem.kernel_vram = tile->mem.vram; + return 0; } diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index b44ebf50fedb..7adfccf68e4c 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -13,6 +13,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_assert.h" +#include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt_mcr.h" @@ -283,8 +284,11 @@ static void vram_fini(void *arg) xe->mem.vram->mapping = NULL; - for_each_tile(tile, xe, id) + for_each_tile(tile, xe, id) { tile->mem.vram->mapping = NULL; + if (tile->mem.kernel_vram) + tile->mem.kernel_vram->mapping = NULL; + } } struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement) -- cgit v1.2.3 From 9d290ab0b5a054cf5bb39a80b90140fa22704f61 Mon Sep 17 00:00:00 2001 From: Piotr Piórkowski Date: Fri, 3 Oct 2025 18:26:16 +0200 Subject: drm/xe: Introduce new BO flag XE_BO_FLAG_FORCE_USER_VRAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using a separate VRAM region for kernel allocations, some kernel structures, such as context userspace data, should not reside in the VRAM region dedicated to the kernel. The VRAM kernel region is intended only for allocations necessary for driver operation. Allocations created via ioctl are long-lived and not easily evictable. If this region runs out of space, there may not be a fallback, which could cause failures. To prevent this, add a new BO flag that explicitly forces the BO to be allocated in the general-purpose VRAM region accessible to userspace, avoiding the kernel-only VRAM region. v2: - update commit message (Matthew) Signed-off-by: Piotr Piórkowski Cc: Matthew Auld Reviewed-by: Matthew Auld Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251003162619.1984236-3-piotr.piorkowski@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 8 ++++---- drivers/gpu/drm/xe/xe_bo.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index ce20f58ee80e..a1302afb31b9 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -226,14 +226,14 @@ static u8 vram_bo_flag_to_tile_id(struct xe_device *xe, u32 vram_bo_flag) return __ffs(vram_bo_flag >> (__ffs(XE_BO_FLAG_VRAM0) - 1)) - 1; } -static u32 bo_vram_flags_to_vram_placement(struct xe_device *xe, u32 vram_flag, +static u32 bo_vram_flags_to_vram_placement(struct xe_device *xe, u32 bo_flags, u32 vram_flag, enum ttm_bo_type type) { u8 tile_id = vram_bo_flag_to_tile_id(xe, vram_flag); xe_assert(xe, tile_id < xe->info.tile_count); - if (type == ttm_bo_type_kernel) + if (type == ttm_bo_type_kernel && !(bo_flags & XE_BO_FLAG_FORCE_USER_VRAM)) return xe->tiles[tile_id].mem.kernel_vram->placement; else return xe->tiles[tile_id].mem.vram->placement; @@ -276,7 +276,7 @@ static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, u32 vram_flag; for_each_set_bo_vram_flag(vram_flag, bo_flags) { - u32 pl = bo_vram_flags_to_vram_placement(xe, vram_flag, type); + u32 pl = bo_vram_flags_to_vram_placement(xe, bo_flags, vram_flag, type); add_vram(xe, bo, bo->placements, bo_flags, pl, c); } @@ -2275,7 +2275,7 @@ static int __xe_bo_fixed_placement(struct xe_device *xe, if (flags & XE_BO_FLAG_STOLEN) place->mem_type = XE_PL_STOLEN; else - place->mem_type = bo_vram_flags_to_vram_placement(xe, vram_flag, type); + place->mem_type = bo_vram_flags_to_vram_placement(xe, flags, vram_flag, type); bo->placement = (struct ttm_placement) { .num_placement = 1, diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 9a8401300b15..353d607d301d 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -49,6 +49,7 @@ #define XE_BO_FLAG_GGTT2 BIT(22) #define XE_BO_FLAG_GGTT3 BIT(23) #define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(24) +#define XE_BO_FLAG_FORCE_USER_VRAM BIT(25) /* this one is trigger internally only */ #define XE_BO_FLAG_INTERNAL_TEST BIT(30) -- cgit v1.2.3 From 3f6cd669d50b67c2a33642c811a3a6645f0334e8 Mon Sep 17 00:00:00 2001 From: Piotr Piórkowski Date: Fri, 3 Oct 2025 18:26:17 +0200 Subject: drm/xe: Force user context allocations in user VRAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In general, kernel structures should be allocated in the kernel-dedicated VRAM region. However, userspace context data - while used by the kernel - does not need to reside there. Let's force the allocation of such data in the general-purpose VRAM region accessible to userspace. Signed-off-by: Piotr Piórkowski Cc: Matthew Auld Reviewed-by: Matthew Auld Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251003162619.1984236-4-piotr.piorkowski@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 7 +++++-- drivers/gpu/drm/xe/xe_lrc.c | 3 +++ drivers/gpu/drm/xe/xe_lrc.h | 6 ++++-- drivers/gpu/drm/xe/xe_pt.c | 2 +- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 9a251abe85f9..df82463b19f6 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -183,7 +183,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, return q; } -static int __xe_exec_queue_init(struct xe_exec_queue *q) +static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) { int i, err; u32 flags = 0; @@ -202,6 +202,9 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) flags |= XE_LRC_CREATE_RUNALONE; } + if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL)) + flags |= XE_LRC_CREATE_USER_CTX; + for (i = 0; i < q->width; ++i) { q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags); if (IS_ERR(q->lrc[i])) { @@ -248,7 +251,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (IS_ERR(q)) return q; - err = __xe_exec_queue_init(q); + err = __xe_exec_queue_init(q, flags); if (err) goto err_post_alloc; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 47e9df775072..c88e90175f59 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1415,6 +1415,9 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (vm && vm->xef) /* userspace */ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; + if (init_flags & XE_LRC_CREATE_USER_CTX) + bo_flags |= XE_BO_FLAG_FORCE_USER_VRAM; + lrc->bo = xe_bo_create_pin_map_novm(xe, tile, bo_size, ttm_bo_type_kernel, diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 188565465779..21a3daab0154 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -44,8 +44,10 @@ struct xe_lrc_snapshot { #define LRC_WA_BB_SIZE SZ_4K -#define XE_LRC_CREATE_RUNALONE 0x1 -#define XE_LRC_CREATE_PXP 0x2 +#define XE_LRC_CREATE_RUNALONE BIT(0) +#define XE_LRC_CREATE_PXP BIT(1) +#define XE_LRC_CREATE_USER_CTX BIT(2) + struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, u32 ring_size, u16 msix_vec, u32 flags); void xe_lrc_destroy(struct kref *ref); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index a1c88f9a6c76..e39da95f3a05 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -122,7 +122,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | XE_BO_FLAG_NO_RESV_EVICT | XE_BO_FLAG_PAGETABLE; if (vm->xef) /* userspace */ - bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; + bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM; pt->level = level; -- cgit v1.2.3 From b48140f4465090b5b21110a9df9a9aa546c69152 Mon Sep 17 00:00:00 2001 From: Piotr Piórkowski Date: Fri, 3 Oct 2025 18:26:18 +0200 Subject: drm/xe/pf: Force use user VRAM for LMEM provisioning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LMEM assigned to VFs should be allocated from the general-purpose VRAM pool, not from the kernel-reserved region. Let's force the use of general-purpose VRAM for BOs intended for VFs. Signed-off-by: Piotr Piórkowski Reviewed-by: Matthew Auld Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251003162619.1984236-5-piotr.piorkowski@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 6344b5205c08..b2e5c52978e6 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1484,7 +1484,8 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_NEEDS_2M | XE_BO_FLAG_PINNED | - XE_BO_FLAG_PINNED_LATE_RESTORE); + XE_BO_FLAG_PINNED_LATE_RESTORE | + XE_BO_FLAG_FORCE_USER_VRAM); if (IS_ERR(bo)) return PTR_ERR(bo); -- cgit v1.2.3 From 8462d16d1b75325cb7220cd2731a47033baff863 Mon Sep 17 00:00:00 2001 From: Piotr Piórkowski Date: Fri, 3 Oct 2025 18:26:19 +0200 Subject: drm/xe: Combine userspace context check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both vm->xef and XE_LRC_CREATE_USER_CTX indicate in xe_lrc_init that the context originates from userspace. However, XE_LRC_CREATE_USER_CTX has a broader scope as it may be set even when no vm->xef is present. The XE_BO_FLAG_PINNED_LATE_RESTORE flag can be extended to both cases, so there is no point in handling the two cases separately. Let's combine vm->xef and XE_LRC_CREATE_USER_CTX checks to detect userspace context. Signed-off-by: Piotr Piórkowski Suggested-by: Matthew Auld Reviewed-by: Matthew Auld Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251003162619.1984236-6-piotr.piorkowski@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index c88e90175f59..af09f70f6e78 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1412,11 +1412,9 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE; - if (vm && vm->xef) /* userspace */ - bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; - if (init_flags & XE_LRC_CREATE_USER_CTX) - bo_flags |= XE_BO_FLAG_FORCE_USER_VRAM; + if ((vm && vm->xef) || init_flags & XE_LRC_CREATE_USER_CTX) /* userspace */ + bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM; lrc->bo = xe_bo_create_pin_map_novm(xe, tile, bo_size, -- cgit v1.2.3 From a908de69ceaf08192dc1732f07a147c10b9be3e0 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Sat, 4 Oct 2025 00:03:31 +0000 Subject: drm/xe: Fix copyright and function naming in xe_ttm_vram_mgr - Correct copyright year from "2002" to "2022". - Rename ttm_vram_mgr_fini() to xe_ttm_vram_mgr_fini() to avoid confusion with generic TTM helpers. No functional changes intended. Signed-off-by: Shuicheng Lin Reviewed-by: Nitin Gote Link: https://lore.kernel.org/r/20251004000425.2489291-2-shuicheng.lin@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 9175b4a2214b..9f70802fce92 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: MIT /* * Copyright © 2021-2022 Intel Corporation - * Copyright (C) 2021-2002 Red Hat + * Copyright (C) 2021-2022 Red Hat */ #include @@ -284,7 +284,7 @@ static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = { .debug = xe_ttm_vram_mgr_debug }; -static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg) +static void xe_ttm_vram_mgr_fini(struct drm_device *dev, void *arg) { struct xe_device *xe = to_xe_device(dev); struct xe_ttm_vram_mgr *mgr = arg; @@ -335,7 +335,7 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, ttm_set_driver_manager(&xe->ttm, mem_type, &mgr->manager); ttm_resource_manager_set_used(&mgr->manager, true); - return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); + return drmm_add_action_or_reset(&xe->drm, xe_ttm_vram_mgr_fini, mgr); } /** -- cgit v1.2.3 From 869580c415c975ac293a869a3e560e009a07e7c7 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 2 Oct 2025 19:13:08 +0200 Subject: drm/xe: Don't force DRM_XE_DEBUG_MEMIRQ for SR-IOV debug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For pure SR-IOV debugging there is no need to select already separated config for the debugging of the memory based interrupts, as the latter is also very noisy on its own. Change config order and use a weak reverse dependency instead. Signed-off-by: Michal Wajdeczko Reviewed-by: Jonathan Cavitt Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20251002171308.203127-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Kconfig.debug | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 87902b4bd6d3..01227c77f6d7 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -40,23 +40,23 @@ config DRM_XE_DEBUG_VM If in doubt, say "N". -config DRM_XE_DEBUG_MEMIRQ - bool "Enable extra memirq debugging" +config DRM_XE_DEBUG_SRIOV + bool "Enable extra SR-IOV debugging" default n + imply DRM_XE_DEBUG_MEMIRQ help - Choose this option to enable additional debugging info for - memory based interrupts. + Enable extra SR-IOV debugging info. Recommended for driver developers only. If in doubt, say "N". -config DRM_XE_DEBUG_SRIOV - bool "Enable extra SR-IOV debugging" +config DRM_XE_DEBUG_MEMIRQ + bool "Enable extra memirq debugging" default n - select DRM_XE_DEBUG_MEMIRQ help - Enable extra SR-IOV debugging info. + Choose this option to enable additional debugging info for + memory based interrupts. Recommended for driver developers only. -- cgit v1.2.3 From 71f1939e0d9e6702c0424c31521b72b7f025e6dd Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 2 Oct 2025 21:27:36 +0200 Subject: drm/xe/xe_late_bind_fw: Fix and simplify parsing user input Code was wrongly passing sizeof(uval) as the number base to use, and unlike other debugfs entries that represent bool data, it wasn't using the dedicated function to parse user input as bool. Signed-off-by: Michal Wajdeczko Cc: Badal Nilawar Cc: Rodrigo Vivi Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20251002192736.203186-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_debugfs.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index dba0a9c4a4d2..34f622bef3e0 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -349,17 +349,14 @@ static ssize_t disable_late_binding_set(struct file *f, const char __user *ubuf, { struct xe_device *xe = file_inode(f)->i_private; struct xe_late_bind *late_bind = &xe->late_bind; - u32 uval; - ssize_t ret; + bool val; + int ret; - ret = kstrtouint_from_user(ubuf, size, sizeof(uval), &uval); + ret = kstrtobool_from_user(ubuf, size, &val); if (ret) return ret; - if (uval > 1) - return -EINVAL; - - late_bind->disable = !!uval; + late_bind->disable = val; return size; } -- cgit v1.2.3 From 9a54b5127f0845a414ba475dc941035a4c589e59 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 4 Oct 2025 18:20:08 +0200 Subject: drm/xe/pf: Make the late-initialization really late MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While the late PF per-GT initialization is done quite late in the single GT initialization flow, in case of multi-GT platforms, it may still be done before other GT early initialization. That leads to some issues during unwind, when there are cross-GT dependencies, like resource cleanup that is shared by both GTs, but the other GT may already be sanitized or disabled. The following errors could be observed when trying to unload the PF driver with some LMEM/VRAM already provisioned for few VFs: [ ] xe 0000:03:00.0: DEVRES REL ffff88814708f240 fini_config (16 bytes) [ ] xe 0000:03:00.0: [drm:lmtt_write_pte [xe]] PF: LMTT: WRITE level=2 index=1 pte=0x0 [ ] xe 0000:03:00.0: [drm:lmtt_invalidate_hw [xe]] PF: LMTT: num_fences=2 err=-19 [ ] xe 0000:03:00.0: [drm:lmtt_pt_free [xe]] PF: LMTT: level=0 addr=53a470000 [ ] xe 0000:03:00.0: [drm:lmtt_pt_free [xe]] PF: LMTT: level=1 addr=53a4b0000 [ ] xe 0000:03:00.0: [drm:lmtt_invalidate_hw [xe]] PF: LMTT: num_fences=2 err=-19 [ ] xe 0000:03:00.0: [drm] PF: LMTT0 invalidation failed (-ENODEV) [ ] xe 0000:03:00.0: [drm:lmtt_write_pte [xe]] PF: LMTT: WRITE level=2 index=2 pte=0x0 [ ] xe 0000:03:00.0: [drm:lmtt_invalidate_hw [xe]] PF: LMTT: num_fences=2 err=-19 [ ] xe 0000:03:00.0: [drm:lmtt_pt_free [xe]] PF: LMTT: level=0 addr=539b70000 [ ] xe 0000:03:00.0: [drm:lmtt_pt_free [xe]] PF: LMTT: level=1 addr=539bf0000 [ ] xe 0000:03:00.0: [drm:lmtt_invalidate_hw [xe]] PF: LMTT: num_fences=2 err=-19 [ ] xe 0000:03:00.0: [drm] PF: LMTT0 invalidation failed (-ENODEV) Move all PF per-GT late initialization to the already defined late SR-IOV initialization function to allow proper order of the cleanup actions. While around, format all PF function stubs as one-liners, like many other stubs are defined in the Xe driver. Signed-off-by: Michal Wajdeczko Reviewed-by: Jonathan Cavitt Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20251004162008.1782-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 4 +--- drivers/gpu/drm/xe/xe_sriov.c | 2 ++ drivers/gpu/drm/xe/xe_sriov_pf.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf.h | 13 ++++--------- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index b77572a19548..8fc3a6929d6c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -573,10 +573,8 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); - if (IS_SRIOV_PF(gt_to_xe(gt))) { - xe_gt_sriov_pf_init(gt); + if (IS_SRIOV_PF(gt_to_xe(gt))) xe_gt_sriov_pf_init_hw(gt); - } xe_force_wake_put(gt_to_fw(gt), fw_ref); diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index 7d2d6de2aabf..ea411944609b 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -167,6 +167,8 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size) */ int xe_sriov_init_late(struct xe_device *xe) { + if (IS_SRIOV_PF(xe)) + return xe_sriov_pf_init_late(xe); if (IS_SRIOV_VF(xe)) return xe_sriov_vf_init_late(xe); diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 4698348c010a..2814f29f1093 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -103,6 +103,31 @@ int xe_sriov_pf_init_early(struct xe_device *xe) return 0; } +/** + * xe_sriov_pf_init_late() - Late initialization of the SR-IOV PF. + * @xe: the &xe_device to initialize + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_init_late(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + int err; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_init(gt); + if (err) + return err; + } + + return 0; +} + /** * xe_sriov_pf_wait_ready() - Wait until PF is ready to operate. * @xe: the &xe_device to test diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h index e6cf930ecde4..cba3fde9581f 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -15,18 +15,13 @@ struct xe_device; #ifdef CONFIG_PCI_IOV bool xe_sriov_pf_readiness(struct xe_device *xe); int xe_sriov_pf_init_early(struct xe_device *xe); +int xe_sriov_pf_init_late(struct xe_device *xe); int xe_sriov_pf_wait_ready(struct xe_device *xe); void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); #else -static inline bool xe_sriov_pf_readiness(struct xe_device *xe) -{ - return false; -} - -static inline int xe_sriov_pf_init_early(struct xe_device *xe) -{ - return 0; -} +static inline bool xe_sriov_pf_readiness(struct xe_device *xe) { return false; } +static inline int xe_sriov_pf_init_early(struct xe_device *xe) { return 0; } +static inline int xe_sriov_pf_init_late(struct xe_device *xe) { return 0; } #endif #endif -- cgit v1.2.3 From c95f18020799a824b5d1b8b5d95390293f0c2912 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 5 Oct 2025 15:36:37 +0200 Subject: drm/xe: Update SRIOV printk macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recently we introduced xe-based printk macros, use them instead of plain drm-based ones. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20251005133641.2651-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov_printk.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov_printk.h b/drivers/gpu/drm/xe/xe_sriov_printk.h index 117e1d541692..4c6b5c3d2190 100644 --- a/drivers/gpu/drm/xe/xe_sriov_printk.h +++ b/drivers/gpu/drm/xe/xe_sriov_printk.h @@ -1,22 +1,22 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright © 2023 Intel Corporation + * Copyright © 2023-2025 Intel Corporation */ #ifndef _XE_SRIOV_PRINTK_H_ #define _XE_SRIOV_PRINTK_H_ -#include - -#include "xe_device_types.h" -#include "xe_sriov_types.h" +#include "xe_printk.h" #define xe_sriov_printk_prefix(xe) \ ((xe)->sriov.__mode == XE_SRIOV_MODE_PF ? "PF: " : \ (xe)->sriov.__mode == XE_SRIOV_MODE_VF ? "VF: " : "") +#define __XE_SRIOV_PRINTK_FMT(_xe, _fmt, _args...) \ + "%s" _fmt, xe_sriov_printk_prefix(_xe), ##_args + #define xe_sriov_printk(xe, _level, fmt, ...) \ - drm_##_level(&(xe)->drm, "%s" fmt, xe_sriov_printk_prefix(xe), ##__VA_ARGS__) + xe_##_level((xe), __XE_SRIOV_PRINTK_FMT((xe), fmt, ##__VA_ARGS__)) #define xe_sriov_err(xe, fmt, ...) \ xe_sriov_printk((xe), err, fmt, ##__VA_ARGS__) -- cgit v1.2.3 From c66e4b6cae1f9cda645ed82e77c35d442cef400d Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 5 Oct 2025 15:36:38 +0200 Subject: drm/xe: Add tile-based SRIOV printk macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have device and GT level SR-IOV specific macros, but unlike native case, we don't have yet tile-based ones. Add macros to match native use case and also update GT-based macros to rely on those new tile-based SR-IOV macros. This will slightly rearrange the output of the GT logs and instead: [...] Tile0: GT0: PF: pushed VF1 config with 2 KLVs... we might see: [...] PF: Tile0: GT0: pushed VF1 config with 2 KLVs... but that's even better. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20251005133641.2651-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_printk.h | 7 +++++-- drivers/gpu/drm/xe/xe_tile_sriov_printk.h | 33 +++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_tile_sriov_printk.h diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_printk.h b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h index 17624b16300a..d3457d608db8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_printk.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h @@ -7,10 +7,13 @@ #define _XE_GT_SRIOV_PRINTK_H_ #include "xe_gt_printk.h" -#include "xe_sriov_printk.h" +#include "xe_tile_sriov_printk.h" + +#define __XE_GT_SRIOV_PRINTK_FMT(_gt, _fmt, ...) \ + __XE_TILE_SRIOV_PRINTK_FMT((_gt)->tile, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) #define __xe_gt_sriov_printk(gt, _level, fmt, ...) \ - xe_gt_printk((gt), _level, "%s" fmt, xe_sriov_printk_prefix(gt_to_xe(gt)), ##__VA_ARGS__) + xe_sriov_##_level(gt_to_xe(gt), __XE_GT_SRIOV_PRINTK_FMT((gt), fmt, ##__VA_ARGS__)) #define xe_gt_sriov_err(_gt, _fmt, ...) \ __xe_gt_sriov_printk(_gt, err, _fmt, ##__VA_ARGS__) diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_printk.h b/drivers/gpu/drm/xe/xe_tile_sriov_printk.h new file mode 100644 index 000000000000..68323512872c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_printk.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TILE_SRIOV_PRINTK_H_ +#define _XE_TILE_SRIOV_PRINTK_H_ + +#include "xe_tile_printk.h" +#include "xe_sriov_printk.h" + +#define __XE_TILE_SRIOV_PRINTK_FMT(_tile, _fmt, ...) \ + __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__) + +#define xe_tile_sriov_printk(_tile, _level, _fmt, ...) \ + xe_sriov_##_level((_tile)->xe, __XE_TILE_SRIOV_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) + +#define xe_tile_sriov_err(_tile, _fmt, ...) \ + xe_tile_sriov_printk(_tile, err, _fmt, ##__VA_ARGS__) + +#define xe_tile_sriov_notice(_tile, _fmt, ...) \ + xe_tile_sriov_printk(_tile, notice, _fmt, ##__VA_ARGS__) + +#define xe_tile_sriov_info(_tile, _fmt, ...) \ + xe_tile_sriov_printk(_tile, info, _fmt, ##__VA_ARGS__) + +#define xe_tile_sriov_dbg(_tile, _fmt, ...) \ + xe_tile_sriov_printk(_tile, dbg, _fmt, ##__VA_ARGS__) + +#define xe_tile_sriov_dbg_verbose(_tile, _fmt, ...) \ + xe_tile_sriov_printk(_tile, dbg_verbose, _fmt, ##__VA_ARGS__) + +#endif -- cgit v1.2.3 From cd11babcd03a7d06b0ffffa34a318e6350a8f629 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 5 Oct 2025 15:36:39 +0200 Subject: drm/xe/pf: Update LMTT to use tile-based messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since now we have tile-based SR-IOV printk macros, there is no need to manually prepare the LMTT specific warning message (that is now upgraded to proper error level message) nor to use generic debug message without tile/LMTT identification. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20251005133641.2651-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_lmtt.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 62fc5a1a332d..4dc1de482eee 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -17,7 +17,7 @@ #include "xe_mmio.h" #include "xe_res_cursor.h" #include "xe_sriov.h" -#include "xe_sriov_printk.h" +#include "xe_tile_sriov_printk.h" /** * DOC: Local Memory Translation Table @@ -32,7 +32,7 @@ */ #define lmtt_assert(lmtt, condition) xe_tile_assert(lmtt_to_tile(lmtt), condition) -#define lmtt_debug(lmtt, msg...) xe_sriov_dbg_verbose(lmtt_to_xe(lmtt), "LMTT: " msg) +#define lmtt_debug(lmtt, msg...) xe_tile_sriov_dbg_verbose(lmtt_to_tile(lmtt), "LMTT: " msg) static bool xe_has_multi_level_lmtt(struct xe_device *xe) { @@ -267,15 +267,14 @@ static int lmtt_invalidate_hw(struct xe_lmtt *lmtt) */ void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt) { - struct xe_device *xe = lmtt_to_xe(lmtt); int err; - lmtt_assert(lmtt, IS_SRIOV_PF(xe)); + lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt))); err = lmtt_invalidate_hw(lmtt); if (err) - xe_sriov_warn(xe, "LMTT%u invalidation failed (%pe)", - lmtt_to_tile(lmtt)->id, ERR_PTR(err)); + xe_tile_sriov_err(lmtt_to_tile(lmtt), "LMTT invalidation failed (%pe)", + ERR_PTR(err)); } static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, -- cgit v1.2.3 From 430d3288776f790a05da02b81b53c8dec231ecd6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 5 Oct 2025 15:36:40 +0200 Subject: drm/xe: Update MEMIRQ to use tile-based printk macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have tile-based printk macros, there is no need to manually prepare MEMIRQ specific messages to include tile id. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20251005133641.2651-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_memirq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 49c45ec3e83c..0affede05820 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -14,16 +14,15 @@ #include "xe_device.h" #include "xe_device_types.h" #include "xe_gt.h" -#include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_hw_engine.h" #include "xe_map.h" #include "xe_memirq.h" +#include "xe_tile_printk.h" #define memirq_assert(m, condition) xe_tile_assert(memirq_to_tile(m), condition) #define memirq_printk(m, _level, _fmt, ...) \ - drm_##_level(&memirq_to_xe(m)->drm, "MEMIRQ%u: " _fmt, \ - memirq_to_tile(m)->id, ##__VA_ARGS__) + xe_tile_##_level(memirq_to_tile(m), "MEMIRQ: " _fmt, ##__VA_ARGS__) #ifdef CONFIG_DRM_XE_DEBUG_MEMIRQ #define memirq_debug(m, _fmt, ...) memirq_printk(m, dbg, _fmt, ##__VA_ARGS__) -- cgit v1.2.3 From 0faa22e70643211213ba13e7317600da9e96e44b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 5 Oct 2025 19:39:46 +0200 Subject: drm/xe/guc: Ratelimit diagnostic messages from the relay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There might be some malicious VFs that by sending an invalid VF2PF relay messages will flood PF's dmesg with our diagnostics messages. Rate limit all relay messages, unless running in DEBUG_SRIOV mode. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20251005173946.2784-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_relay.c | 17 +++++++++++++++-- drivers/gpu/drm/xe/xe_guc_relay_types.h | 4 ++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c index e5dc94f3e618..0c0ff24ba62a 100644 --- a/drivers/gpu/drm/xe/xe_guc_relay.c +++ b/drivers/gpu/drm/xe/xe_guc_relay.c @@ -56,9 +56,19 @@ static struct xe_device *relay_to_xe(struct xe_guc_relay *relay) return gt_to_xe(relay_to_gt(relay)); } +#define XE_RELAY_DIAG_RATELIMIT_INTERVAL (10 * HZ) +#define XE_RELAY_DIAG_RATELIMIT_BURST 10 + +#define relay_ratelimit_printk(relay, _level, fmt...) ({ \ + typeof(relay) _r = (relay); \ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) || \ + ___ratelimit(&_r->diag_ratelimit, "xe_guc_relay")) \ + xe_gt_sriov_##_level(relay_to_gt(_r), "relay: " fmt); \ +}) + #define relay_assert(relay, condition) xe_gt_assert(relay_to_gt(relay), condition) -#define relay_notice(relay, msg...) xe_gt_sriov_notice(relay_to_gt(relay), "relay: " msg) -#define relay_debug(relay, msg...) xe_gt_sriov_dbg_verbose(relay_to_gt(relay), "relay: " msg) +#define relay_notice(relay, msg...) relay_ratelimit_printk((relay), notice, msg) +#define relay_debug(relay, msg...) relay_ratelimit_printk((relay), dbg_verbose, msg) static int relay_get_totalvfs(struct xe_guc_relay *relay) { @@ -345,6 +355,9 @@ int xe_guc_relay_init(struct xe_guc_relay *relay) INIT_WORK(&relay->worker, relays_worker_fn); INIT_LIST_HEAD(&relay->pending_relays); INIT_LIST_HEAD(&relay->incoming_actions); + ratelimit_state_init(&relay->diag_ratelimit, + XE_RELAY_DIAG_RATELIMIT_INTERVAL, + XE_RELAY_DIAG_RATELIMIT_BURST); err = mempool_init_kmalloc_pool(&relay->pool, XE_RELAY_MEMPOOL_MIN_NUM + relay_get_totalvfs(relay), diff --git a/drivers/gpu/drm/xe/xe_guc_relay_types.h b/drivers/gpu/drm/xe/xe_guc_relay_types.h index 5999fcb77e96..20eee10856b2 100644 --- a/drivers/gpu/drm/xe/xe_guc_relay_types.h +++ b/drivers/gpu/drm/xe/xe_guc_relay_types.h @@ -7,6 +7,7 @@ #define _XE_GUC_RELAY_TYPES_H_ #include +#include #include #include @@ -31,6 +32,9 @@ struct xe_guc_relay { /** @last_rid: last Relay-ID used while sending a message. */ u32 last_rid; + + /** @diag_ratelimit: ratelimit state used to throttle diagnostics messages. */ + struct ratelimit_state diag_ratelimit; }; #endif -- cgit v1.2.3 From 15623c860c93aac71d22e7bedb7661ff2d3418de Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Mon, 29 Sep 2025 11:02:05 +0200 Subject: nsfs: handle inode number mismatches gracefully in file handles Replace VFS_WARN_ON_ONCE() with graceful error handling when file handles contain inode numbers that don't match the actual namespace inode. This prevents userspace from triggering kernel warnings by providing malformed file handles to open_by_handle_at(). The issue occurs when userspace provides a file handle with valid namespace type and ID that successfully locates a namespace, but specifies an incorrect inode number. Previously, this would trigger VFS_WARN_ON_ONCE() when comparing the real inode number against the provided value. Since file handle data is user-controllable, inode number mismatches should be treated as invalid input rather than kernel consistency errors. Handle this case by returning NULL to indicate the file handle is invalid, rather than warning about what is essentially user input validation. Reported-by: syzbot+9eefe09bedd093f156c2@syzkaller.appspotmail.com Suggested-by: Jan Kara Reviewed-by: Jan Kara Signed-off-by: Deepanshu Kartikey Signed-off-by: Christian Brauner --- fs/nsfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nsfs.c b/fs/nsfs.c index 648dc59bef7f..79b026a36fb6 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -490,7 +490,9 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh, VFS_WARN_ON_ONCE(ns->ns_id != fid->ns_id); VFS_WARN_ON_ONCE(ns->ns_type != fid->ns_type); - VFS_WARN_ON_ONCE(ns->inum != fid->ns_inum); + + if (ns->inum != fid->ns_inum) + return NULL; if (!__ns_ref_get(ns)) return NULL; -- cgit v1.2.3 From deafd21efdd106f9744e2339e0c70c0f4ba565c3 Mon Sep 17 00:00:00 2001 From: Zhou Yuhang Date: Wed, 24 Sep 2025 20:21:39 +0800 Subject: fs: update comment in init_file() The f_count member in struct file has been replaced by f_ref, so update f_count to f_ref in the comment. Signed-off-by: Zhou Yuhang Signed-off-by: Christian Brauner --- fs/file_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/file_table.c b/fs/file_table.c index b223d873e48b..cd4a3db4659a 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -192,7 +192,7 @@ static int init_file(struct file *f, int flags, const struct cred *cred) f->f_sb_err = 0; /* - * We're SLAB_TYPESAFE_BY_RCU so initialize f_count last. While + * We're SLAB_TYPESAFE_BY_RCU so initialize f_ref last. While * fget-rcu pattern users need to be able to handle spurious * refcount bumps we should reinitialize the reused file first. */ -- cgit v1.2.3 From d68a29a6a229f8b4f3b19dbcd0bb02881316d642 Mon Sep 17 00:00:00 2001 From: Tong Li Date: Tue, 30 Sep 2025 19:02:58 +0800 Subject: rust: file: add intra-doc link for 'EBADF' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `BadFdError` doc comment mentions the `EBADF` constant but does not currently provide a navigation target for readers of the generated docs. Turning the references into intra-doc links matches the rest of the module and makes the documentation easier to explore. Suggested-by: Onur Özkan Link: https://github.com/Rust-for-Linux/linux/issues/1186 Signed-off-by: Tong Li Reviewed-by: Onur Özkan Signed-off-by: Christian Brauner --- rust/kernel/fs/file.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index cf06e73a6da0..cd6987850332 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -448,9 +448,9 @@ impl Drop for FileDescriptorReservation { } } -/// Represents the `EBADF` error code. +/// Represents the [`EBADF`] error code. /// -/// Used for methods that can only fail with `EBADF`. +/// Used for methods that can only fail with [`EBADF`]. #[derive(Copy, Clone, Eq, PartialEq)] pub struct BadFdError; -- cgit v1.2.3 From 154d1e7ad9e5ce4b2aaefd3862b3dba545ad978d Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Tue, 30 Sep 2025 13:42:57 +0800 Subject: dax: skip read lock assertion for read-only filesystems The commit 168316db3583("dax: assert that i_rwsem is held exclusive for writes") added lock assertions to ensure proper locking in DAX operations. However, these assertions trigger false-positive lockdep warnings since read lock is unnecessary on read-only filesystems(e.g., erofs). This patch skips the read lock assertion for read-only filesystems, eliminating the spurious warnings while maintaining the integrity checks for writable filesystems. Fixes: 168316db3583 ("dax: assert that i_rwsem is held exclusive for writes") Signed-off-by: Yuezhang Mo Reviewed-by: Friendy Su Reviewed-by: Daniel Palmer Reviewed-by: Gao Xiang Signed-off-by: Christian Brauner --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 89f071ba7b10..516f995a988c 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1725,7 +1725,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == WRITE) { lockdep_assert_held_write(&iomi.inode->i_rwsem); iomi.flags |= IOMAP_WRITE; - } else { + } else if (!sb_rdonly(iomi.inode->i_sb)) { lockdep_assert_held(&iomi.inode->i_rwsem); } -- cgit v1.2.3 From 4592e7abd2cd2900d312a7438dcaa56c13728314 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 4 Oct 2025 18:20:34 +0200 Subject: drm/xe/pf: Improve reading VF config blob from debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to the use of the file operation flows, we might encode the VF config blob multiple times. Avoid that by capturing it once during the open() operation instead of the read() operation. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20251004162036.1800-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 44 +++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 44fec2aae536..c026a3910e7e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -436,15 +436,19 @@ static const struct file_operations guc_state_ops = { * : ├── config_blob */ -static ssize_t config_blob_read(struct file *file, char __user *buf, - size_t count, loff_t *pos) +struct config_blob_data { + size_t size; + u8 blob[]; +}; + +static int config_blob_open(struct inode *inode, struct file *file) { struct dentry *dent = file_dentry(file); struct dentry *parent = dent->d_parent; struct xe_gt *gt = extract_gt(parent); unsigned int vfid = extract_vfid(parent); + struct config_blob_data *cbd; ssize_t ret; - void *tmp; ret = xe_gt_sriov_pf_config_save(gt, vfid, NULL, 0); if (!ret) @@ -452,16 +456,27 @@ static ssize_t config_blob_read(struct file *file, char __user *buf, if (ret < 0) return ret; - tmp = kzalloc(ret, GFP_KERNEL); - if (!tmp) + cbd = kzalloc(struct_size(cbd, blob, ret), GFP_KERNEL); + if (!cbd) return -ENOMEM; - ret = xe_gt_sriov_pf_config_save(gt, vfid, tmp, ret); - if (ret > 0) - ret = simple_read_from_buffer(buf, count, pos, tmp, ret); + ret = xe_gt_sriov_pf_config_save(gt, vfid, cbd->blob, ret); + if (ret < 0) { + kfree(cbd); + return ret; + } - kfree(tmp); - return ret; + cbd->size = ret; + file->private_data = cbd; + return nonseekable_open(inode, file); +} + +static ssize_t config_blob_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + struct config_blob_data *cbd = file->private_data; + + return simple_read_from_buffer(buf, count, pos, cbd->blob, cbd->size); } static ssize_t config_blob_write(struct file *file, const char __user *buf, @@ -498,11 +513,18 @@ static ssize_t config_blob_write(struct file *file, const char __user *buf, return ret; } +static int config_blob_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + static const struct file_operations config_blob_ops = { .owner = THIS_MODULE, + .open = config_blob_open, .read = config_blob_read, .write = config_blob_write, - .llseek = default_llseek, + .release = config_blob_release, }; static void pf_add_compat_attrs(struct xe_gt *gt, struct dentry *dent, unsigned int vfid) -- cgit v1.2.3 From 56094ad3eaa21e6621396cc33811d8f72847a834 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Oct 2025 17:55:07 +0200 Subject: vfs: Don't leak disconnected dentries on umount When user calls open_by_handle_at() on some inode that is not cached, we will create disconnected dentry for it. If such dentry is a directory, exportfs_decode_fh_raw() will then try to connect this dentry to the dentry tree through reconnect_path(). It may happen for various reasons (such as corrupted fs or race with rename) that the call to lookup_one_unlocked() in reconnect_one() will fail to find the dentry we are trying to reconnect and instead create a new dentry under the parent. Now this dentry will not be marked as disconnected although the parent still may well be disconnected (at least in case this inconsistency happened because the fs is corrupted and .. doesn't point to the real parent directory). This creates inconsistency in disconnected flags but AFAICS it was mostly harmless. At least until commit f1ee616214cb ("VFS: don't keep disconnected dentries on d_anon") which removed adding of most disconnected dentries to sb->s_anon list. Thus after this commit cleanup of disconnected dentries implicitely relies on the fact that dput() will immediately reclaim such dentries. However when some leaf dentry isn't marked as disconnected, as in the scenario described above, the reclaim doesn't happen and the dentries are "leaked". Memory reclaim can eventually reclaim them but otherwise they stay in memory and if umount comes first, we hit infamous "Busy inodes after unmount" bug. Make sure all dentries created under a disconnected parent are marked as disconnected as well. Reported-by: syzbot+1d79ebe5383fc016cf07@syzkaller.appspotmail.com Fixes: f1ee616214cb ("VFS: don't keep disconnected dentries on d_anon") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Christian Brauner --- fs/dcache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index a067fa0a965a..035cccbc9276 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2557,6 +2557,8 @@ struct dentry *d_alloc_parallel(struct dentry *parent, spin_lock(&parent->d_lock); new->d_parent = dget_dlock(parent); hlist_add_head(&new->d_sib, &parent->d_children); + if (parent->d_flags & DCACHE_DISCONNECTED) + new->d_flags |= DCACHE_DISCONNECTED; spin_unlock(&parent->d_lock); retry: -- cgit v1.2.3 From a779e27f24aeb679969ddd1fdd7f636e22ddbc1e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 7 Oct 2025 11:32:42 +0200 Subject: coredump: fix core_pattern input validation In be1e0283021e ("coredump: don't pointlessly check and spew warnings") we tried to fix input validation so it only happens during a write to core_pattern. This would avoid needlessly logging a lot of warnings during a read operation. However the logic accidently got inverted in this commit. Fix it so the input validation only happens on write and is skipped on read. Fixes: be1e0283021e ("coredump: don't pointlessly check and spew warnings") Fixes: 16195d2c7dd2 ("coredump: validate socket name as it is written") Reviewed-by: Jan Kara Reported-by: Yu Watanabe Signed-off-by: Christian Brauner --- fs/coredump.c | 2 +- fs/exec.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index b5fc06a092a4..5c1c381ee380 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1468,7 +1468,7 @@ static int proc_dostring_coredump(const struct ctl_table *table, int write, ssize_t retval; char old_core_pattern[CORENAME_MAX_SIZE]; - if (write) + if (!write) return proc_dostring(table, write, buffer, lenp, ppos); retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE); diff --git a/fs/exec.c b/fs/exec.c index 6b70c6726d31..4298e7e08d5d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2048,7 +2048,7 @@ static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int writ { int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (!error && !write) + if (!error && write) validate_coredump_safety(); return error; } -- cgit v1.2.3 From e2c69490dda5d4c9f1bfbb2898989c8f3530e354 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 6 Oct 2025 13:18:57 -0700 Subject: ipmi: Fix handling of messages with provided receive message pointer Prior to commit b52da4054ee0 ("ipmi: Rework user message limit handling"), i_ipmi_request() used to increase the user reference counter if the receive message is provided by the caller of IPMI API functions. This is no longer the case. However, ipmi_free_recv_msg() is still called and decreases the reference counter. This results in the reference counter reaching zero, the user data pointer is released, and all kinds of interesting crashes are seen. Fix the problem by increasing user reference counter if the receive message has been provided by the caller. Fixes: b52da4054ee0 ("ipmi: Rework user message limit handling") Reported-by: Eric Dumazet Cc: Eric Dumazet Cc: Greg Thelen Signed-off-by: Guenter Roeck Message-ID: <20251006201857.3433837-1-linux@roeck-us.net> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index a0b67a35a5f0..3700ab4eba3e 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -2301,8 +2301,11 @@ static int i_ipmi_request(struct ipmi_user *user, if (supplied_recv) { recv_msg = supplied_recv; recv_msg->user = user; - if (user) + if (user) { atomic_inc(&user->nr_msgs); + /* The put happens when the message is freed. */ + kref_get(&user->refcount); + } } else { recv_msg = ipmi_alloc_recv_msg(user); if (IS_ERR(recv_msg)) -- cgit v1.2.3 From 1bcc3f87912779f66cdf1789a066046536ca6ccc Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Wed, 24 Sep 2025 10:42:55 -0700 Subject: KVM: selftests: Test prefault memory during concurrent memslot removal Expand the prefault memory selftest to add a regression test for a KVM bug where KVM's retry logic would result in (breakable) deadlock due to the memslot deletion waiting on prefaulting to release SRCU, and prefaulting waiting on the memslot to fully disappear (KVM uses a two-step process to delete memslots, and KVM x86 retries page faults if a to-be-deleted, a.k.a. INVALID, memslot is encountered). To exercise concurrent memslot remove, spawn a second thread to initiate memslot removal at roughly the same time as prefaulting. Test memslot removal for all testcases, i.e. don't limit concurrent removal to only the success case. There are essentially three prefault scenarios (so far) that are of interest: 1. Success 2. ENOENT due to no memslot 3. EAGAIN due to INVALID memslot For all intents and purposes, #1 and #2 are mutually exclusive, or rather, easier to test via separate testcases since writing to non-existent memory is trivial. But for #3, making it mutually exclusive with #1 _or_ #2 is actually more complex than testing memslot removal for all scenarios. The only requirement to let memslot removal coexist with other scenarios is a way to guarantee a stable result, e.g. that the "no memslot" test observes ENOENT, not EAGAIN, for the final checks. So, rather than make memslot removal mutually exclusive with the ENOENT scenario, simply restore the memslot and retry prefaulting. For the "no memslot" case, KVM_PRE_FAULT_MEMORY should be idempotent, i.e. should always fail with ENOENT regardless of how many times userspace attempts prefaulting. Pass in both the base GPA and the offset (instead of the "full" GPA) so that the worker can recreate the memslot. Signed-off-by: Yan Zhao Co-developed-by: Sean Christopherson Link: https://lore.kernel.org/r/20250924174255.2141847-1-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/pre_fault_memory_test.c | 131 ++++++++++++++++++--- 1 file changed, 114 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c index 0350a8896a2f..f04768c1d2e4 100644 --- a/tools/testing/selftests/kvm/pre_fault_memory_test.c +++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c @@ -10,6 +10,7 @@ #include #include #include +#include /* Arbitrarily chosen values */ #define TEST_SIZE (SZ_2M + PAGE_SIZE) @@ -30,18 +31,66 @@ static void guest_code(uint64_t base_gpa) GUEST_DONE(); } -static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size, - u64 left) +struct slot_worker_data { + struct kvm_vm *vm; + u64 gpa; + uint32_t flags; + bool worker_ready; + bool prefault_ready; + bool recreate_slot; +}; + +static void *delete_slot_worker(void *__data) +{ + struct slot_worker_data *data = __data; + struct kvm_vm *vm = data->vm; + + WRITE_ONCE(data->worker_ready, true); + + while (!READ_ONCE(data->prefault_ready)) + cpu_relax(); + + vm_mem_region_delete(vm, TEST_SLOT); + + while (!READ_ONCE(data->recreate_slot)) + cpu_relax(); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa, + TEST_SLOT, TEST_NPAGES, data->flags); + + return NULL; +} + +static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset, + u64 size, u64 expected_left, bool private) { struct kvm_pre_fault_memory range = { - .gpa = gpa, + .gpa = base_gpa + offset, .size = size, .flags = 0, }; - u64 prev; + struct slot_worker_data data = { + .vm = vcpu->vm, + .gpa = base_gpa, + .flags = private ? KVM_MEM_GUEST_MEMFD : 0, + }; + bool slot_recreated = false; + pthread_t slot_worker; int ret, save_errno; + u64 prev; + + /* + * Concurrently delete (and recreate) the slot to test KVM's handling + * of a racing memslot deletion with prefaulting. + */ + pthread_create(&slot_worker, NULL, delete_slot_worker, &data); - do { + while (!READ_ONCE(data.worker_ready)) + cpu_relax(); + + WRITE_ONCE(data.prefault_ready, true); + + for (;;) { prev = range.size; ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range); save_errno = errno; @@ -49,18 +98,65 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size, "%sexpecting range.size to change on %s", ret < 0 ? "not " : "", ret < 0 ? "failure" : "success"); - } while (ret >= 0 ? range.size : save_errno == EINTR); - TEST_ASSERT(range.size == left, - "Completed with %lld bytes left, expected %" PRId64, - range.size, left); + /* + * Immediately retry prefaulting if KVM was interrupted by an + * unrelated signal/event. + */ + if (ret < 0 && save_errno == EINTR) + continue; + + /* + * Tell the worker to recreate the slot in order to complete + * prefaulting (if prefault didn't already succeed before the + * slot was deleted) and/or to prepare for the next testcase. + * Wait for the worker to exit so that the next invocation of + * prefaulting is guaranteed to complete (assuming no KVM bugs). + */ + if (!slot_recreated) { + WRITE_ONCE(data.recreate_slot, true); + pthread_join(slot_worker, NULL); + slot_recreated = true; + + /* + * Retry prefaulting to get a stable result, i.e. to + * avoid seeing random EAGAIN failures. Don't retry if + * prefaulting already succeeded, as KVM disallows + * prefaulting with size=0, i.e. blindly retrying would + * result in test failures due to EINVAL. KVM should + * always return success if all bytes are prefaulted, + * i.e. there is no need to guard against EAGAIN being + * returned. + */ + if (range.size) + continue; + } + + /* + * All done if there are no remaining bytes to prefault, or if + * prefaulting failed (EINTR was handled above, and EAGAIN due + * to prefaulting a memslot that's being actively deleted should + * be impossible since the memslot has already been recreated). + */ + if (!range.size || ret < 0) + break; + } - if (left == 0) - __TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm); + TEST_ASSERT(range.size == expected_left, + "Completed with %llu bytes left, expected %lu", + range.size, expected_left); + + /* + * Assert success if prefaulting the entire range should succeed, i.e. + * complete with no bytes remaining. Otherwise prefaulting should have + * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when + * no memslot exists). + */ + if (!expected_left) + TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); else - /* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */ - __TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, - "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm); + TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, + KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); } static void __test_pre_fault_memory(unsigned long vm_type, bool private) @@ -97,9 +193,10 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private) if (private) vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE); - pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0); - pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE); - pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE); + + pre_fault_memory(vcpu, guest_test_phys_mem, 0, SZ_2M, 0, private); + pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private); + pre_fault_memory(vcpu, guest_test_phys_mem, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private); vcpu_args_set(vcpu, 1, guest_test_virt_mem); vcpu_run(vcpu); -- cgit v1.2.3 From c09a9933af08cdcb295ccc65a6d9862bf8474706 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 3 Oct 2025 01:26:48 +0200 Subject: drm/xe/pf: Add max_vfs configfs attribute to control PF mode In addition to existing max_vfs modparam, add max_vfs configfs attribute to allow PF configuration on the per-device level. Default config value is still based on the modparam value. Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Acked-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251002232648.203370-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_configfs.c | 139 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_configfs.h | 4 ++ drivers/gpu/drm/xe/xe_sriov_pf.c | 3 + 3 files changed, 146 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 139663423185..464a79c2a903 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -18,6 +18,7 @@ #include "xe_hw_engine_types.h" #include "xe_module.h" #include "xe_pci_types.h" +#include "xe_sriov_types.h" /** * DOC: Xe Configfs @@ -169,6 +170,32 @@ * Currently this is implemented only for post and mid context restore and * these attributes can only be set before binding to the device. * + * Max SR-IOV Virtual Functions + * ---------------------------- + * + * This config allows to limit number of the Virtual Functions (VFs) that can + * be managed by the Physical Function (PF) driver, where value 0 disables the + * PF mode (no VFs). + * + * The default max_vfs config value is taken from the max_vfs modparam. + * + * How to enable PF with support with unlimited (up to HW limit) number of VFs:: + * + * # echo unlimited > /sys/kernel/config/xe/0000:00:02.0/sriov/max_vfs + * # echo 0000:00:02.0 > /sys/bus/pci/drivers/xe/bind + * + * How to enable PF with support up to 3 VFs:: + * + * # echo 3 > /sys/kernel/config/xe/0000:00:02.0/sriov/max_vfs + * # echo 0000:00:02.0 > /sys/bus/pci/drivers/xe/bind + * + * How to disable PF mode and always run as native:: + * + * # echo 0 > /sys/kernel/config/xe/0000:00:02.0/sriov/max_vfs + * # echo 0000:00:02.0 > /sys/bus/pci/drivers/xe/bind + * + * This setting only takes effect when probing the device. + * * Remove devices * ============== * @@ -185,6 +212,7 @@ struct wa_bb { struct xe_config_group_device { struct config_group group; + struct config_group sriov; struct xe_config_device { u64 engines_allowed; @@ -192,23 +220,34 @@ struct xe_config_group_device { struct wa_bb ctx_restore_mid_bb[XE_ENGINE_CLASS_MAX]; bool survivability_mode; bool enable_psmi; + struct { + unsigned int max_vfs; + } sriov; } config; /* protects attributes */ struct mutex lock; /* matching descriptor */ const struct xe_device_desc *desc; + /* tentative SR-IOV mode */ + enum xe_sriov_mode mode; }; static const struct xe_config_device device_defaults = { .engines_allowed = U64_MAX, .survivability_mode = false, .enable_psmi = false, + .sriov = { + .max_vfs = UINT_MAX, + }, }; static void set_device_defaults(struct xe_config_device *config) { *config = device_defaults; +#ifdef CONFIG_PCI_IOV + config->sriov.max_vfs = xe_modparam.max_vfs; +#endif } struct engine_info { @@ -721,6 +760,68 @@ static const struct config_item_type xe_config_device_type = { .ct_owner = THIS_MODULE, }; +static ssize_t sriov_max_vfs_show(struct config_item *item, char *page) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent); + + guard(mutex)(&dev->lock); + + if (dev->config.sriov.max_vfs == UINT_MAX) + return sprintf(page, "%s\n", "unlimited"); + else + return sprintf(page, "%u\n", dev->config.sriov.max_vfs); +} + +static ssize_t sriov_max_vfs_store(struct config_item *item, const char *page, size_t len) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent); + unsigned int max_vfs; + int ret; + + guard(mutex)(&dev->lock); + + if (is_bound(dev)) + return -EBUSY; + + ret = kstrtouint(page, 0, &max_vfs); + if (ret) { + if (!sysfs_streq(page, "unlimited")) + return ret; + max_vfs = UINT_MAX; + } + + dev->config.sriov.max_vfs = max_vfs; + return len; +} + +CONFIGFS_ATTR(sriov_, max_vfs); + +static struct configfs_attribute *xe_config_sriov_attrs[] = { + &sriov_attr_max_vfs, + NULL, +}; + +static bool xe_config_sriov_is_visible(struct config_item *item, + struct configfs_attribute *attr, int n) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent); + + if (attr == &sriov_attr_max_vfs && dev->mode != XE_SRIOV_MODE_PF) + return false; + + return true; +} + +static struct configfs_group_operations xe_config_sriov_group_ops = { + .is_visible = xe_config_sriov_is_visible, +}; + +static const struct config_item_type xe_config_sriov_type = { + .ct_owner = THIS_MODULE, + .ct_group_ops = &xe_config_sriov_group_ops, + .ct_attrs = xe_config_sriov_attrs, +}; + static const struct xe_device_desc *xe_match_desc(struct pci_dev *pdev) { struct device_driver *driver = driver_find("xe", &pci_bus_type); @@ -746,6 +847,7 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro unsigned int domain, bus, slot, function; struct xe_config_group_device *dev; const struct xe_device_desc *match; + enum xe_sriov_mode mode; struct pci_dev *pdev; char canonical[16]; int vfnumber = 0; @@ -762,6 +864,9 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro return ERR_PTR(-EINVAL); pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); + mode = pdev ? dev_is_pf(&pdev->dev) ? + XE_SRIOV_MODE_PF : XE_SRIOV_MODE_NONE : XE_SRIOV_MODE_VF; + if (!pdev && function) pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, 0)); if (!pdev && slot) @@ -796,9 +901,15 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro return ERR_PTR(-ENOMEM); dev->desc = match; + dev->mode = match->has_sriov ? mode : XE_SRIOV_MODE_NONE; + set_device_defaults(&dev->config); config_group_init_type_name(&dev->group, name, &xe_config_device_type); + if (dev->mode != XE_SRIOV_MODE_NONE) { + config_group_init_type_name(&dev->sriov, "sriov", &xe_config_sriov_type); + configfs_add_default_group(&dev->sriov, &dev->group); + } mutex_init(&dev->lock); @@ -988,6 +1099,34 @@ u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, return len; } +#ifdef CONFIG_PCI_IOV +/** + * xe_configfs_get_max_vfs() - Get number of VFs that could be managed + * @pdev: the &pci_dev device + * + * Find the configfs group that belongs to the PCI device and return maximum + * number of Virtual Functions (VFs) that could be managed by this device. + * If configfs group is not present, use value of max_vfs module parameter. + * + * Return: maximum number of VFs that could be managed. + */ +unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + unsigned int max_vfs; + + if (!dev) + return xe_modparam.max_vfs; + + scoped_guard(mutex, &dev->lock) + max_vfs = dev->config.sriov.max_vfs; + + config_group_put(&dev->group); + + return max_vfs; +} +#endif + int __init xe_configfs_init(void) { int ret; diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h index c61e0e47ed94..16a1f578e4fe 100644 --- a/drivers/gpu/drm/xe/xe_configfs.h +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -23,6 +23,9 @@ u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_clas const u32 **cs); u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, const u32 **cs); +#ifdef CONFIG_PCI_IOV +unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev); +#endif #else static inline int xe_configfs_init(void) { return 0; } static inline void xe_configfs_exit(void) { } @@ -34,6 +37,7 @@ static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum const u32 **cs) { return 0; } static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, const u32 **cs) { return 0; } +static inline unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev) { return UINT_MAX; } #endif #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 2814f29f1093..bc1ab9ee31d9 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -8,6 +8,7 @@ #include #include "xe_assert.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_gt_sriov_pf.h" #include "xe_module.h" @@ -19,6 +20,8 @@ static unsigned int wanted_max_vfs(struct xe_device *xe) { + if (IS_ENABLED(CONFIG_CONFIGFS_FS)) + return xe_configfs_get_max_vfs(to_pci_dev(xe->drm.dev)); return xe_modparam.max_vfs; } -- cgit v1.2.3 From 2a27f6a8fb5722223d526843040f747e9b0e8060 Mon Sep 17 00:00:00 2001 From: Celeste Liu Date: Tue, 30 Sep 2025 19:34:28 +0800 Subject: can: gs_usb: increase max interface to U8_MAX This issue was found by Runcheng Lu when develop HSCanT USB to CAN FD converter[1]. The original developers may have only 3 interfaces device to test so they write 3 here and wait for future change. During the HSCanT development, we actually used 4 interfaces, so the limitation of 3 is not enough now. But just increase one is not future-proofed. Since the channel index type in gs_host_frame is u8, just make canch[] become a flexible array with a u8 index, so it naturally constraint by U8_MAX and avoid statically allocate 256 pointer for every gs_usb device. [1]: https://github.com/cherry-embedded/HSCanT-hardware Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices") Reported-by: Runcheng Lu Cc: stable@vger.kernel.org Reviewed-by: Vincent Mailhol Signed-off-by: Celeste Liu Link: https://patch.msgid.link/20250930-gs-usb-max-if-v5-1-863330bf6666@coelacanthus.name Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index c9482d6e947b..9fb4cbbd6d6d 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -289,11 +289,6 @@ struct gs_host_frame { #define GS_MAX_RX_URBS 30 #define GS_NAPI_WEIGHT 32 -/* Maximum number of interfaces the driver supports per device. - * Current hardware only supports 3 interfaces. The future may vary. - */ -#define GS_MAX_INTF 3 - struct gs_tx_context { struct gs_can *dev; unsigned int echo_id; @@ -324,7 +319,6 @@ struct gs_can { /* usb interface struct */ struct gs_usb { - struct gs_can *canch[GS_MAX_INTF]; struct usb_anchor rx_submitted; struct usb_device *udev; @@ -336,9 +330,11 @@ struct gs_usb { unsigned int hf_size_rx; u8 active_channels; + u8 channel_cnt; unsigned int pipe_in; unsigned int pipe_out; + struct gs_can *canch[] __counted_by(channel_cnt); }; /* 'allocate' a tx context. @@ -599,7 +595,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) } /* device reports out of range channel id */ - if (hf->channel >= GS_MAX_INTF) + if (hf->channel >= parent->channel_cnt) goto device_detach; dev = parent->canch[hf->channel]; @@ -699,7 +695,7 @@ resubmit_urb: /* USB failure take down all interfaces */ if (rc == -ENODEV) { device_detach: - for (rc = 0; rc < GS_MAX_INTF; rc++) { + for (rc = 0; rc < parent->channel_cnt; rc++) { if (parent->canch[rc]) netif_device_detach(parent->canch[rc]->netdev); } @@ -1460,17 +1456,19 @@ static int gs_usb_probe(struct usb_interface *intf, icount = dconf.icount + 1; dev_info(&intf->dev, "Configuring for %u interfaces\n", icount); - if (icount > GS_MAX_INTF) { + if (icount > type_max(parent->channel_cnt)) { dev_err(&intf->dev, "Driver cannot handle more that %u CAN interfaces\n", - GS_MAX_INTF); + type_max(parent->channel_cnt)); return -EINVAL; } - parent = kzalloc(sizeof(*parent), GFP_KERNEL); + parent = kzalloc(struct_size(parent, canch, icount), GFP_KERNEL); if (!parent) return -ENOMEM; + parent->channel_cnt = icount; + init_usb_anchor(&parent->rx_submitted); usb_set_intfdata(intf, parent); @@ -1531,7 +1529,7 @@ static void gs_usb_disconnect(struct usb_interface *intf) return; } - for (i = 0; i < GS_MAX_INTF; i++) + for (i = 0; i < parent->channel_cnt; i++) if (parent->canch[i]) gs_destroy_candev(parent->canch[i]); -- cgit v1.2.3 From a12f0bc764da3781da2019c60826f47a6d7ed64f Mon Sep 17 00:00:00 2001 From: Celeste Liu Date: Tue, 30 Sep 2025 14:53:39 +0800 Subject: can: gs_usb: gs_make_candev(): populate net_device->dev_port The gs_usb driver supports USB devices with more than 1 CAN channel. In old kernel before 3.15, it uses net_device->dev_id to distinguish different channel in userspace, which was done in commit acff76fa45b4 ("can: gs_usb: gs_make_candev(): set netdev->dev_id"). But since 3.15, the correct way is populating net_device->dev_port. And according to documentation, if network device support multiple interface, lack of net_device->dev_port SHALL be treated as a bug. Fixes: acff76fa45b4 ("can: gs_usb: gs_make_candev(): set netdev->dev_id") Cc: stable@vger.kernel.org Signed-off-by: Celeste Liu Link: https://patch.msgid.link/20250930-gs-usb-populate-net_device-dev_port-v1-1-68a065de6937@coelacanthus.name Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 9fb4cbbd6d6d..69b8d6da651b 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -1245,6 +1245,7 @@ static struct gs_can *gs_make_candev(unsigned int channel, netdev->flags |= IFF_ECHO; /* we support full roundtrip echo */ netdev->dev_id = channel; + netdev->dev_port = channel; /* dev setup */ strcpy(dev->bt_const.name, KBUILD_MODNAME); -- cgit v1.2.3 From ba569fb07a7e9e9b71e9282e27e993ba859295c2 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 6 Aug 2025 17:46:32 +0200 Subject: can: m_can: m_can_plat_remove(): add missing pm_runtime_disable() Commit 227619c3ff7c ("can: m_can: move runtime PM enable/disable to m_can_platform") moved the PM runtime enable from the m_can core driver into the m_can_platform. That patch forgot to move the pm_runtime_disable() to m_can_plat_remove(), so that unloading the m_can_platform driver causes an "Unbalanced pm_runtime_enable!" error message. Add the missing pm_runtime_disable() to m_can_plat_remove() to fix the problem. Cc: Patrik Flykt Fixes: 227619c3ff7c ("can: m_can: move runtime PM enable/disable to m_can_platform") Reviewed-by: Markus Schneider-Pargmann Link: https://patch.msgid.link/20250929-m_can-fix-state-handling-v4-1-682b49b49d9a@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c index b832566efda0..057eaa7b8b4b 100644 --- a/drivers/net/can/m_can/m_can_platform.c +++ b/drivers/net/can/m_can/m_can_platform.c @@ -180,7 +180,7 @@ static void m_can_plat_remove(struct platform_device *pdev) struct m_can_classdev *mcan_class = &priv->cdev; m_can_class_unregister(mcan_class); - + pm_runtime_disable(mcan_class->dev); m_can_class_free_dev(mcan_class->net); } -- cgit v1.2.3 From 3d9db29b45f970d81acf61cf91a65442efbeb997 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 6 Aug 2025 16:56:15 +0200 Subject: can: m_can: m_can_handle_state_errors(): fix CAN state transition to Error Active The CAN Error State is determined by the receive and transmit error counters. The CAN error counters decrease when reception/transmission is successful, so that a status transition back to the Error Active status is possible. This transition is not handled by m_can_handle_state_errors(). Add the missing detection of the Error Active state to m_can_handle_state_errors() and extend the handling of this state in m_can_handle_state_change(). Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Fixes: cd0d83eab2e0 ("can: m_can: m_can_handle_state_change(): fix state change") Reviewed-by: Markus Schneider-Pargmann Link: https://patch.msgid.link/20250929-m_can-fix-state-handling-v4-2-682b49b49d9a@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 53 ++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index e1d725979685..ac864183a536 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -812,6 +812,9 @@ static int m_can_handle_state_change(struct net_device *dev, u32 timestamp = 0; switch (new_state) { + case CAN_STATE_ERROR_ACTIVE: + cdev->can.state = CAN_STATE_ERROR_ACTIVE; + break; case CAN_STATE_ERROR_WARNING: /* error warning state */ cdev->can.can_stats.error_warning++; @@ -841,6 +844,12 @@ static int m_can_handle_state_change(struct net_device *dev, __m_can_get_berr_counter(dev, &bec); switch (new_state) { + case CAN_STATE_ERROR_ACTIVE: + cf->can_id |= CAN_ERR_CRTL | CAN_ERR_CNT; + cf->data[1] = CAN_ERR_CRTL_ACTIVE; + cf->data[6] = bec.txerr; + cf->data[7] = bec.rxerr; + break; case CAN_STATE_ERROR_WARNING: /* error warning state */ cf->can_id |= CAN_ERR_CRTL | CAN_ERR_CNT; @@ -877,30 +886,33 @@ static int m_can_handle_state_change(struct net_device *dev, return 1; } -static int m_can_handle_state_errors(struct net_device *dev, u32 psr) +static enum can_state +m_can_state_get_by_psr(struct m_can_classdev *cdev) { - struct m_can_classdev *cdev = netdev_priv(dev); - int work_done = 0; + u32 reg_psr; - if (psr & PSR_EW && cdev->can.state != CAN_STATE_ERROR_WARNING) { - netdev_dbg(dev, "entered error warning state\n"); - work_done += m_can_handle_state_change(dev, - CAN_STATE_ERROR_WARNING); - } + reg_psr = m_can_read(cdev, M_CAN_PSR); - if (psr & PSR_EP && cdev->can.state != CAN_STATE_ERROR_PASSIVE) { - netdev_dbg(dev, "entered error passive state\n"); - work_done += m_can_handle_state_change(dev, - CAN_STATE_ERROR_PASSIVE); - } + if (reg_psr & PSR_BO) + return CAN_STATE_BUS_OFF; + if (reg_psr & PSR_EP) + return CAN_STATE_ERROR_PASSIVE; + if (reg_psr & PSR_EW) + return CAN_STATE_ERROR_WARNING; - if (psr & PSR_BO && cdev->can.state != CAN_STATE_BUS_OFF) { - netdev_dbg(dev, "entered error bus off state\n"); - work_done += m_can_handle_state_change(dev, - CAN_STATE_BUS_OFF); - } + return CAN_STATE_ERROR_ACTIVE; +} - return work_done; +static int m_can_handle_state_errors(struct net_device *dev) +{ + struct m_can_classdev *cdev = netdev_priv(dev); + enum can_state new_state; + + new_state = m_can_state_get_by_psr(cdev); + if (new_state == cdev->can.state) + return 0; + + return m_can_handle_state_change(dev, new_state); } static void m_can_handle_other_err(struct net_device *dev, u32 irqstatus) @@ -1031,8 +1043,7 @@ static int m_can_rx_handler(struct net_device *dev, int quota, u32 irqstatus) } if (irqstatus & IR_ERR_STATE) - work_done += m_can_handle_state_errors(dev, - m_can_read(cdev, M_CAN_PSR)); + work_done += m_can_handle_state_errors(dev); if (irqstatus & IR_ERR_BUS_30X) work_done += m_can_handle_bus_errors(dev, irqstatus, -- cgit v1.2.3 From 4942c42fe1849e6d68dfb5b36ccba344a9fac016 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 6 Aug 2025 18:24:12 +0200 Subject: can: m_can: m_can_chip_config(): bring up interface in correct state In some SoCs (observed on the STM32MP15) the M_CAN IP core keeps the CAN state and CAN error counters over an internal reset cycle. An external reset is not always possible, due to the shared reset with the other CAN core. This caused the core not always be in Error Active state when bringing up the controller. Instead of always setting the CAN state to Error Active in m_can_chip_config(), fix this by reading and decoding the Protocol Status Regitser (PSR) and set the CAN state accordingly. Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Reviewed-by: Markus Schneider-Pargmann Link: https://patch.msgid.link/20250929-m_can-fix-state-handling-v4-3-682b49b49d9a@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index ac864183a536..b6db5b57241c 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1617,7 +1617,7 @@ static int m_can_start(struct net_device *dev) netdev_queue_set_dql_min_limit(netdev_get_tx_queue(cdev->net, 0), cdev->tx_max_coalesced_frames); - cdev->can.state = CAN_STATE_ERROR_ACTIVE; + cdev->can.state = m_can_state_get_by_psr(cdev); m_can_enable_all_interrupts(cdev); -- cgit v1.2.3 From a9e30a22d6f23a2684c248871cad4c3061181639 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 12 Aug 2025 16:58:31 +0200 Subject: can: m_can: fix CAN state in system PM A suspend/resume cycle on a down interface results in the interface coming up in Error Active state. A suspend/resume cycle on an Up interface will always result in Error Active state, regardless of the actual CAN state. During suspend, only set running interfaces to CAN_STATE_SLEEPING. During resume only touch the CAN state of running interfaces. For wakeup sources, set the CAN state depending on the Protocol Status Regitser (PSR), for non wakeup source interfaces m_can_start() will do the same. Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Reviewed-by: Markus Schneider-Pargmann Link: https://patch.msgid.link/20250929-m_can-fix-state-handling-v4-4-682b49b49d9a@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index b6db5b57241c..f2576e577058 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2503,12 +2503,11 @@ int m_can_class_suspend(struct device *dev) } m_can_clk_stop(cdev); + cdev->can.state = CAN_STATE_SLEEPING; } pinctrl_pm_select_sleep_state(dev); - cdev->can.state = CAN_STATE_SLEEPING; - return ret; } EXPORT_SYMBOL_GPL(m_can_class_suspend); @@ -2521,8 +2520,6 @@ int m_can_class_resume(struct device *dev) pinctrl_pm_select_default_state(dev); - cdev->can.state = CAN_STATE_ERROR_ACTIVE; - if (netif_running(ndev)) { ret = m_can_clk_start(cdev); if (ret) @@ -2540,6 +2537,8 @@ int m_can_class_resume(struct device *dev) if (cdev->ops->init) ret = cdev->ops->init(cdev); + cdev->can.state = m_can_state_get_by_psr(cdev); + m_can_write(cdev, M_CAN_IE, cdev->active_interrupts); } else { ret = m_can_start(ndev); -- cgit v1.2.3 From 15b3036045188f4da4ca62b2ed01b0f160252e9b Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Tue, 7 Oct 2025 15:32:08 +0530 Subject: drm/xe: Move declarations under conditional branch The xe_device_shutdown() function was needing a few declarations that were only required under a specific condition. This change moves those declarations to be within that conditional branch to avoid unnecessary declarations. Reviewed-by: Nitin Gote Link: https://patchwork.freedesktop.org/patch/msgid/20251007100208.1407021-1-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay --- drivers/gpu/drm/xe/xe_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 386940323630..ab0302d5f25f 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -986,12 +986,12 @@ void xe_device_remove(struct xe_device *xe) void xe_device_shutdown(struct xe_device *xe) { - struct xe_gt *gt; - u8 id; - drm_dbg(&xe->drm, "Shutting down device\n"); if (xe_driver_flr_disabled(xe)) { + struct xe_gt *gt; + u8 id; + xe_display_pm_shutdown(xe); xe_irq_suspend(xe); -- cgit v1.2.3 From 49836ff2f37dd6d52bfe3153c0bcbd96025a6100 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 9 Oct 2025 08:25:36 +0200 Subject: can: m_can: replace Dong Aisheng's old email address Dong Aisheng's old Freescale email is not valid anymore and bounces, replace it by the new NXP one. Reviewed-by: Dong Aisheng Link: https://patch.msgid.link/20251009-m_can-update-email-address-v1-1-30a268587f69@pengutronix.de Signed-off-by: Marc Kleine-Budde --- .mailmap | 1 + drivers/net/can/m_can/m_can.c | 4 ++-- drivers/net/can/m_can/m_can_platform.c | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.mailmap b/.mailmap index d30f9363a4c9..8160c62f11e9 100644 --- a/.mailmap +++ b/.mailmap @@ -227,6 +227,7 @@ Dmitry Safonov <0x7f454c46@gmail.com> Dmitry Safonov <0x7f454c46@gmail.com> Dmitry Safonov <0x7f454c46@gmail.com> Domen Puncer +Dong Aisheng Douglas Gilbert Drew Fustini diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index f2576e577058..ad4f577c1ef7 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // CAN bus driver for Bosch M_CAN controller // Copyright (C) 2014 Freescale Semiconductor, Inc. -// Dong Aisheng +// Dong Aisheng // Copyright (C) 2018-19 Texas Instruments Incorporated - http://www.ti.com/ /* Bosch M_CAN user manual can be obtained from: @@ -2556,7 +2556,7 @@ int m_can_class_resume(struct device *dev) } EXPORT_SYMBOL_GPL(m_can_class_resume); -MODULE_AUTHOR("Dong Aisheng "); +MODULE_AUTHOR("Dong Aisheng "); MODULE_AUTHOR("Dan Murphy "); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("CAN bus driver for Bosch M_CAN controller"); diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c index 057eaa7b8b4b..4a412add2b8d 100644 --- a/drivers/net/can/m_can/m_can_platform.c +++ b/drivers/net/can/m_can/m_can_platform.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // IOMapped CAN bus driver for Bosch M_CAN controller // Copyright (C) 2014 Freescale Semiconductor, Inc. -// Dong Aisheng +// Dong Aisheng // // Copyright (C) 2018-19 Texas Instruments Incorporated - http://www.ti.com/ @@ -236,7 +236,7 @@ static struct platform_driver m_can_plat_driver = { module_platform_driver(m_can_plat_driver); -MODULE_AUTHOR("Dong Aisheng "); +MODULE_AUTHOR("Dong Aisheng "); MODULE_AUTHOR("Dan Murphy "); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("M_CAN driver for IO Mapped Bosch controllers"); -- cgit v1.2.3 From b0607599b7d1a8372b6dcd790d52bd1aefe59845 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:44:59 -0700 Subject: drm/xe: Add NULL checks to scratch LRC allocation kmalloc can fail, the returned value must have a NULL check. This should be immediately after kmalloc for clarity. v5: - Assert state->buffer in setup_bo if buffer is iomem (Tomasz) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index af09f70f6e78..2c6eae2de1f2 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1214,8 +1214,7 @@ static int setup_bo(struct bo_setup_state *state) ssize_t remain; if (state->lrc->bo->vmap.is_iomem) { - if (!state->buffer) - return -ENOMEM; + xe_gt_assert(state->hwe->gt, state->buffer); state->ptr = state->buffer; } else { state->ptr = state->lrc->bo->vmap.vaddr + state->offset; @@ -1303,8 +1302,11 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) u32 *buf = NULL; int ret; - if (lrc->bo->vmap.is_iomem) + if (lrc->bo->vmap.is_iomem) { buf = kmalloc(LRC_WA_BB_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } ret = xe_lrc_setup_wa_bb_with_scratch(lrc, hwe, buf); @@ -1347,8 +1349,11 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) return 0; - if (lrc->bo->vmap.is_iomem) + if (lrc->bo->vmap.is_iomem) { state.buffer = kmalloc(state.max_size, GFP_KERNEL); + if (!state.buffer) + return -ENOMEM; + } ret = setup_bo(&state); if (ret) { -- cgit v1.2.3 From 26cd498e005d7a4fcf0929a0fe6dbc51b6862b25 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:00 -0700 Subject: drm/xe: Save off position in ring in which a job was programmed VF post-migration recovery needs to modify the ring with updated GGTT addresses for pending jobs. Save off position in ring in which a job was programmed to facilitate. v4: - s/VF resume/VF post-migration recovery (Tomasz) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_ring_ops.c | 23 +++++++++++++++++++---- drivers/gpu/drm/xe/xe_sched_job_types.h | 5 +++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index d71837773d6c..ac0c6dcffe15 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -245,12 +245,14 @@ static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc, - u64 batch_addr, u32 seqno) + u64 batch_addr, u32 *head, u32 seqno) { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); struct xe_gt *gt = job->q->gt; + *head = lrc->ring.tail; + i = emit_copy_timestamp(lrc, dw, i); if (job->ring_ops_flush_tlb) { @@ -296,7 +298,7 @@ static bool has_aux_ccs(struct xe_device *xe) } static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, - u64 batch_addr, u32 seqno) + u64 batch_addr, u32 *head, u32 seqno) { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); @@ -304,6 +306,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, struct xe_device *xe = gt_to_xe(gt); bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; + *head = lrc->ring.tail; + i = emit_copy_timestamp(lrc, dw, i); dw[i++] = preparser_disable(true); @@ -346,7 +350,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, static void __emit_job_gen12_render_compute(struct xe_sched_job *job, struct xe_lrc *lrc, - u64 batch_addr, u32 seqno) + u64 batch_addr, u32 *head, + u32 seqno) { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); @@ -355,6 +360,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 mask_flags = 0; + *head = lrc->ring.tail; + i = emit_copy_timestamp(lrc, dw, i); dw[i++] = preparser_disable(true); @@ -396,11 +403,14 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, } static void emit_migration_job_gen12(struct xe_sched_job *job, - struct xe_lrc *lrc, u32 seqno) + struct xe_lrc *lrc, u32 *head, + u32 seqno) { u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc); u32 dw[MAX_JOB_SIZE_DW], i = 0; + *head = lrc->ring.tail; + i = emit_copy_timestamp(lrc, dw, i); i = emit_store_imm_ggtt(saddr, seqno, dw, i); @@ -434,6 +444,7 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job) __emit_job_gen12_simple(job, job->q->lrc[0], job->ptrs[0].batch_addr, + &job->ptrs[0].head, xe_sched_job_lrc_seqno(job)); } @@ -443,6 +454,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job) if (xe_sched_job_is_migration(job->q)) { emit_migration_job_gen12(job, job->q->lrc[0], + &job->ptrs[0].head, xe_sched_job_lrc_seqno(job)); return; } @@ -450,6 +462,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job) for (i = 0; i < job->q->width; ++i) __emit_job_gen12_simple(job, job->q->lrc[i], job->ptrs[i].batch_addr, + &job->ptrs[i].head, xe_sched_job_lrc_seqno(job)); } @@ -461,6 +474,7 @@ static void emit_job_gen12_video(struct xe_sched_job *job) for (i = 0; i < job->q->width; ++i) __emit_job_gen12_video(job, job->q->lrc[i], job->ptrs[i].batch_addr, + &job->ptrs[i].head, xe_sched_job_lrc_seqno(job)); } @@ -471,6 +485,7 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job) for (i = 0; i < job->q->width; ++i) __emit_job_gen12_render_compute(job, job->q->lrc[i], job->ptrs[i].batch_addr, + &job->ptrs[i].head, xe_sched_job_lrc_seqno(job)); } diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index dbf260dded8d..7ce58765a34a 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -24,6 +24,11 @@ struct xe_job_ptrs { struct dma_fence_chain *chain_fence; /** @batch_addr: Batch buffer address. */ u64 batch_addr; + /** + * @head: The tail pointer of the LRC (so head pointer of job) when the + * job was submitted + */ + u32 head; }; /** -- cgit v1.2.3 From 7e1fe102c8517a402327c37685357fbe279b3278 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:01 -0700 Subject: drm/xe/guc: Track pending-enable source in submission state Add explicit tracking in the GuC submission state to record the source of a pending enable (TDR vs. queue resume path vs. submission). Disambiguating the origin lets the GuC submission state machine apply the correct recovery/replay behavior. This helps VF restore: when the device comes back, the state machine knows whether the pending enable stems from timeout recovery, from a queue resume sequence, or submission and can gate sequencing and fixups accordingly. v4: - Clarify commit message (Tomasz) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 16f78376f196..13746f32b231 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -69,6 +69,8 @@ exec_queue_to_guc(struct xe_exec_queue *q) #define EXEC_QUEUE_STATE_BANNED (1 << 9) #define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) #define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) +#define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 12) +#define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 13) static bool exec_queue_registered(struct xe_exec_queue *q) { @@ -220,6 +222,36 @@ static void set_exec_queue_extra_ref(struct xe_exec_queue *q) atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); } +static bool __maybe_unused exec_queue_pending_resume(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; +} + +static void set_exec_queue_pending_resume(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); +} + +static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); +} + +static bool __maybe_unused exec_queue_pending_tdr_exit(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_TDR_EXIT; +} + +static void set_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); +} + +static void clear_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); +} + static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) { return (atomic_read(&q->guc->state) & @@ -1334,6 +1366,7 @@ trigger_reset: return DRM_GPU_SCHED_STAT_RESET; sched_enable: + set_exec_queue_pending_tdr_exit(q); enable_scheduling(q); rearm: /* @@ -1493,6 +1526,7 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) clear_exec_queue_suspended(q); if (!exec_queue_enabled(q)) { q->guc->resume_time = RESUME_PENDING; + set_exec_queue_pending_resume(q); enable_scheduling(q); } } else { @@ -2065,6 +2099,8 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); q->guc->resume_time = ktime_get(); + clear_exec_queue_pending_resume(q); + clear_exec_queue_pending_tdr_exit(q); clear_exec_queue_pending_enable(q); smp_wmb(); wake_up_all(&guc->ct.wq); -- cgit v1.2.3 From f6375fb3aa9485d572d967c2e6ee1dde22f5ca34 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:02 -0700 Subject: drm/xe: Track LR jobs in DRM scheduler pending list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VF migration requires jobs to remain pending so they can be replayed after the VF comes back. Previously, LR job fences were intentionally signaled immediately after submission to avoid the risk of exporting them, as these fences do not naturally signal in a timely manner and could break dma-fence contracts. A side effect of this approach was that LR jobs were never added to the DRM scheduler’s pending list, preventing them from being tracked for later resubmission. We now avoid signaling LR job fences and ensure they are never exported; Xe already guards against exporting these internal fences. With that guarantee in place, we can safely track LR jobs in the scheduler’s pending list so they are eligible for resubmission during VF post-migration recovery (and similar recovery paths). An added benefit is that LR queues now gain the DRM scheduler’s built-in flow control over ring usage rather than rejecting new jobs in the exec IOCTL if the ring is full. v2: - Ensure DRM scheduler TDR doesn't run for LR jobs - Stack variable for killed_or_banned_or_wedged v4: - Clarify commit message (Tomasz) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-5-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec.c | 12 ++--------- drivers/gpu/drm/xe/xe_exec_queue.c | 19 ----------------- drivers/gpu/drm/xe/xe_exec_queue.h | 2 -- drivers/gpu/drm/xe/xe_guc_submit.c | 43 +++++++++++++++++++++++++------------- 4 files changed, 31 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 83897950f0da..0dc27476832b 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -124,7 +124,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_validation_ctx ctx; struct xe_sched_job *job; struct xe_vm *vm; - bool write_locked, skip_retry = false; + bool write_locked; int err = 0; struct xe_hw_engine_group *group; enum xe_hw_engine_group_execution_mode mode, previous_mode; @@ -266,12 +266,6 @@ retry: goto err_exec; } - if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) { - err = -EWOULDBLOCK; /* Aliased to -EAGAIN */ - skip_retry = true; - goto err_exec; - } - if (xe_exec_queue_uses_pxp(q)) { err = xe_vm_validate_protected(q->vm); if (err) @@ -328,8 +322,6 @@ retry: xe_sched_job_init_user_fence(job, &syncs[i]); } - if (xe_exec_queue_is_lr(q)) - q->ring_ops->emit_job(job); if (!xe_vm_in_lr_mode(vm)) xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished); xe_sched_job_push(job); @@ -355,7 +347,7 @@ err_exec: xe_validation_ctx_fini(&ctx); err_unlock_list: up_read(&vm->lock); - if (err == -EAGAIN && !skip_retry) + if (err == -EAGAIN) goto retry; err_hw_exec_mode: if (mode == EXEC_MODE_DMA_FENCE) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index df82463b19f6..7621089a47fe 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -850,25 +850,6 @@ bool xe_exec_queue_is_lr(struct xe_exec_queue *q) !(q->flags & EXEC_QUEUE_FLAG_VM); } -static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) -{ - return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1; -} - -/** - * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full - * @q: The exec_queue - * - * Return: True if the exec_queue's ring is full, false otherwise. - */ -bool xe_exec_queue_ring_full(struct xe_exec_queue *q) -{ - struct xe_lrc *lrc = q->lrc[0]; - s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; - - return xe_exec_queue_num_job_inflight(q) >= max_job; -} - /** * xe_exec_queue_is_idle() - Whether an exec_queue is idle. * @q: The exec_queue diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 8821ceb838d0..a4dfbe858bda 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -64,8 +64,6 @@ static inline bool xe_exec_queue_uses_pxp(struct xe_exec_queue *q) bool xe_exec_queue_is_lr(struct xe_exec_queue *q); -bool xe_exec_queue_ring_full(struct xe_exec_queue *q); - bool xe_exec_queue_is_idle(struct xe_exec_queue *q); void xe_exec_queue_kill(struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 13746f32b231..3a534d93505f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -851,30 +851,31 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) struct xe_sched_job *job = to_xe_sched_job(drm_job); struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); - struct dma_fence *fence = NULL; - bool lr = xe_exec_queue_is_lr(q); + bool lr = xe_exec_queue_is_lr(q), killed_or_banned_or_wedged = + exec_queue_killed_or_banned_or_wedged(q); xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || exec_queue_banned(q) || exec_queue_suspended(q)); trace_xe_sched_job_run(job); - if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) { + if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { if (!exec_queue_registered(q)) register_exec_queue(q, GUC_CONTEXT_NORMAL); - if (!lr) /* LR jobs are emitted in the exec IOCTL */ - q->ring_ops->emit_job(job); + q->ring_ops->emit_job(job); submit_exec_queue(q); } - if (lr) { - xe_sched_job_set_error(job, -EOPNOTSUPP); - dma_fence_put(job->fence); /* Drop ref from xe_sched_job_arm */ - } else { - fence = job->fence; - } + /* + * We don't care about job-fence ordering in LR VMs because these fences + * are never exported; they are used solely to keep jobs on the pending + * list. Once a queue enters an error state, there's no need to track + * them. + */ + if (killed_or_banned_or_wedged && lr) + xe_sched_job_set_error(job, -ECANCELED); - return fence; + return job->fence; } static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) @@ -916,7 +917,8 @@ static void disable_scheduling_deregister(struct xe_guc *guc, xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(sched); + if (!xe_exec_queue_is_lr(q)) + xe_sched_tdr_queue_imm(sched); return; } @@ -1008,6 +1010,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_gpu_scheduler *sched = &ge->sched; + struct xe_sched_job *job; bool wedged = false; xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); @@ -1058,7 +1061,16 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0])) xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id); + xe_hw_fence_irq_stop(q->fence_irq); + xe_sched_submission_start(sched); + + spin_lock(&sched->base.job_list_lock); + list_for_each_entry(job, &sched->base.pending_list, drm.list) + xe_sched_job_set_error(job, -ECANCELED); + spin_unlock(&sched->base.job_list_lock); + + xe_hw_fence_irq_start(q->fence_irq); } #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) @@ -1129,7 +1141,8 @@ static void enable_scheduling(struct xe_exec_queue *q) xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); set_exec_queue_banned(q); xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(&q->guc->sched); + if (!xe_exec_queue_is_lr(q)) + xe_sched_tdr_queue_imm(&q->guc->sched); } } @@ -1187,6 +1200,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int i = 0; bool wedged = false, skip_timeout_check; + xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_lr(q)); + /* * TDR has fired before free job worker. Common if exec queue * immediately closed after last fence signaled. Add back to pending -- cgit v1.2.3 From b00d1e3fc8b7693c89ddae2ce34dc804eeb2ce37 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:03 -0700 Subject: drm/xe: Return first unsignaled job first pending job helper In all cases where the first pending job helper is called, we only want to retrieve the first unsignaled pending job, as this helper is used exclusively in recovery flows. It is possible for signaled jobs to remain in the pending list as the scheduler is stopped, so those should be skipped. Also, add kernel documentation to clarify this behavior. v8: - Split out into own patch (Auld) Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20251008214532.3442967-6-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gpu_scheduler.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index e548b2aed95a..3a9ff78d9346 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -77,17 +77,30 @@ static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, spin_unlock(&sched->base.job_list_lock); } +/** + * xe_sched_first_pending_job() - Find first pending job which is unsignaled + * @sched: Xe GPU scheduler + * + * Return first unsignaled job in pending list or NULL + */ static inline struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched) { - struct xe_sched_job *job; + struct xe_sched_job *job, *r_job = NULL; spin_lock(&sched->base.job_list_lock); - job = list_first_entry_or_null(&sched->base.pending_list, - struct xe_sched_job, drm.list); + list_for_each_entry(job, &sched->base.pending_list, drm.list) { + struct drm_sched_fence *s_fence = job->drm.s_fence; + struct dma_fence *hw_fence = s_fence->parent; + + if (hw_fence && !dma_fence_is_signaled(hw_fence)) { + r_job = job; + break; + } + } spin_unlock(&sched->base.job_list_lock); - return job; + return r_job; } static inline int -- cgit v1.2.3 From 807c42dd8028d71222dfce035c2e87aaecbf623f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:04 -0700 Subject: drm/xe: Don't change LRC ring head on job resubmission Now that we save the job's head during submission, it's no longer necessary to adjust the LRC ring head during resubmission. Instead, a software-based adjustment of the tail will overwrite the old jobs in place. For some odd reason, adjusting the LRC ring head didn't work on parallel queues, which was causing issues in our CI. v5: - Add comment in guc_exec_queue_start explaning why the function works (Auld) v7: - Only adjust first state on first unsignaled job (Auld) v8: - Break unsignaled job handling to separate patch (Auld) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-7-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 3a534d93505f..d123bdb63369 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -2008,11 +2008,25 @@ static void guc_exec_queue_start(struct xe_exec_queue *q) struct xe_gpu_scheduler *sched = &q->guc->sched; if (!exec_queue_killed_or_banned_or_wedged(q)) { + struct xe_sched_job *job = xe_sched_first_pending_job(sched); int i; trace_xe_exec_queue_resubmit(q); - for (i = 0; i < q->width; ++i) - xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail); + if (job) { + for (i = 0; i < q->width; ++i) { + /* + * The GuC context is unregistered at this point + * time, adjusting software ring tail ensures + * jobs are rewritten in original placement, + * adjusting LRC tail ensures the newly loaded + * GuC / contexts only view the LRC tail + * increasing as jobs are written out. + */ + q->lrc[i]->ring.tail = job->ptrs[i].head; + xe_lrc_set_ring_tail(q->lrc[i], + xe_lrc_ring_head(q->lrc[i])); + } + } xe_sched_resubmit_jobs(sched); } -- cgit v1.2.3 From 0ca229da92bf29b59410d9fae77e5f304df6ab6b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:05 -0700 Subject: drm/xe: Make LRC W/A scratch buffer usage consistent The LRC W/A currently checks for LRC being iomem in some places, while in others it checks if the scratch buffer is non-NULL. This inconsistency causes issues with the VF post-migration recovery code, which blindly passes in a scratch buffer. This patch standardizes the check by consistently verifying whether the LRC is iomem to determine if the scratch buffer should be used. Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251008214532.3442967-8-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 2c6eae2de1f2..b5083c99dd50 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1247,7 +1247,7 @@ fail: static void finish_bo(struct bo_setup_state *state) { - if (!state->buffer) + if (!state->lrc->bo->vmap.is_iomem) return; xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, -- cgit v1.2.3 From e1d2e2d878bf8a61481173c49e8fbe2cd57fe0bb Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:06 -0700 Subject: drm/xe/vf: Add xe_gt_recovery_pending helper Add xe_gt_recovery_pending helper. This helper serves as the singular point to determine whether a GT recovery is currently in progress. Expected callers include the GuC CT layer and the GuC submission layer. Atomically visable as soon as vCPU are unhalted until VF recovery completes. v3: - Add GT layer xe_gt_recovery_inprogress (Michal) - Don't blow up in memirq not enabled (CI) - Add __memirq_received with clear argument (Michal) - xe_memirq_sw_int_0_irq_pending rename (Michal) - Use offset in xe_memirq_sw_int_0_irq_pending (Michal) v4: - Refactor xe_gt_recovery_inprogress logic around memirq (Michal) v5: - s/inprogress/pending (Michal) v7: - Fix typos, adjust comment (Michal) Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-9-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt.h | 13 +++++++++ drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 28 ++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_vf.h | 2 ++ drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h | 10 +++++++ drivers/gpu/drm/xe/xe_memirq.c | 48 ++++++++++++++++++++++++++++--- drivers/gpu/drm/xe/xe_memirq.h | 2 ++ 6 files changed, 99 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 41880979f4de..5df2ffe3ff83 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -12,6 +12,7 @@ #include "xe_device.h" #include "xe_device_types.h" +#include "xe_gt_sriov_vf.h" #include "xe_hw_engine.h" #define for_each_hw_engine(hwe__, gt__, id__) \ @@ -124,4 +125,16 @@ static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe) hwe->instance == gt->usm.reserved_bcs_instance; } +/** + * xe_gt_recovery_pending() - GT recovery pending + * @gt: the &xe_gt + * + * Return: True if GT recovery in pending, False otherwise + */ +static inline bool xe_gt_recovery_pending(struct xe_gt *gt) +{ + return IS_SRIOV_VF(gt_to_xe(gt)) && + xe_gt_sriov_vf_recovery_pending(gt); +} + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 0461d5513487..43cb5fd7b222 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -26,6 +26,7 @@ #include "xe_guc_hxg_helpers.h" #include "xe_guc_relay.h" #include "xe_lrc.h" +#include "xe_memirq.h" #include "xe_mmio.h" #include "xe_sriov.h" #include "xe_sriov_vf.h" @@ -776,6 +777,7 @@ void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); xe_gt_assert(gt, IS_SRIOV_VF(xe)); + xe_gt_assert(gt, xe_gt_sriov_vf_recovery_pending(gt)); set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags); /* @@ -1118,3 +1120,29 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "\thandshake:\t%u.%u\n", pf_version->major, pf_version->minor); } + +/** + * xe_gt_sriov_vf_recovery_pending() - VF post migration recovery pending + * @gt: the &xe_gt + * + * The return value of this function must be immediately visible upon vCPU + * unhalt and must persist until RESFIX_DONE is issued. This guarantee is + * currently implemented only for platforms that support memirq. If non-memirq + * platforms begin to support VF migration, this function will need to be + * updated accordingly. + * + * Return: True if VF post migration recovery is pending, False otherwise + */ +bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt) +{ + struct xe_memirq *memirq = >_to_tile(gt)->memirq; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + /* early detection until recovery starts */ + if (xe_device_uses_memirq(gt_to_xe(gt)) && + xe_memirq_guc_sw_int_0_irq_pending(memirq, >->uc.guc)) + return true; + + return READ_ONCE(gt->sriov.vf.migration.recovery_inprogress); +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index 0af1dc769fe0..b91ae857e983 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -25,6 +25,8 @@ void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt); int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); +bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt); + u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt); u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index 298dedf4b009..1dfef60ec044 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -46,6 +46,14 @@ struct xe_gt_sriov_vf_runtime { } *regs; }; +/** + * xe_gt_sriov_vf_migration - VF migration data. + */ +struct xe_gt_sriov_vf_migration { + /** @recovery_inprogress: VF post migration recovery in progress */ + bool recovery_inprogress; +}; + /** * struct xe_gt_sriov_vf - GT level VF virtualization data. */ @@ -58,6 +66,8 @@ struct xe_gt_sriov_vf { struct xe_gt_sriov_vf_selfconfig self_config; /** @runtime: runtime data retrieved from the PF. */ struct xe_gt_sriov_vf_runtime runtime; + /** @migration: migration data for the VF. */ + struct xe_gt_sriov_vf_migration migration; }; #endif diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 0affede05820..2ef9d9aab264 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -397,8 +397,9 @@ void xe_memirq_postinstall(struct xe_memirq *memirq) memirq_set_enable(memirq, true); } -static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector, - u16 offset, const char *name) +static bool __memirq_received(struct xe_memirq *memirq, + struct iosys_map *vector, u16 offset, + const char *name, bool clear) { u8 value; @@ -408,12 +409,26 @@ static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector, memirq_err_ratelimited(memirq, "Unexpected memirq value %#x from %s at %u\n", value, name, offset); - iosys_map_wr(vector, offset, u8, 0x00); + if (clear) + iosys_map_wr(vector, offset, u8, 0x00); } return value; } +static bool memirq_received_noclear(struct xe_memirq *memirq, + struct iosys_map *vector, + u16 offset, const char *name) +{ + return __memirq_received(memirq, vector, offset, name, false); +} + +static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector, + u16 offset, const char *name) +{ + return __memirq_received(memirq, vector, offset, name, true); +} + static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *status, struct xe_hw_engine *hwe) { @@ -433,8 +448,16 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); - if (memirq_received(memirq, status, ilog2(GUC_INTR_SW_INT_0), name)) + /* + * This is a software interrupt that must be cleared after it's consumed + * to avoid race conditions where xe_gt_sriov_vf_recovery_pending() + * returns false. + */ + if (memirq_received_noclear(memirq, status, ilog2(GUC_INTR_SW_INT_0), + name)) { xe_guc_irq_handler(guc, GUC_INTR_SW_INT_0); + iosys_map_wr(status, ilog2(GUC_INTR_SW_INT_0), u8, 0x00); + } } /** @@ -459,6 +482,23 @@ void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe) } } +/** + * xe_memirq_guc_sw_int_0_irq_pending() - SW_INT_0 IRQ is pending + * @memirq: the &xe_memirq + * @guc: the &xe_guc to check for IRQ + * + * Return: True if SW_INT_0 IRQ is pending on @guc, False otherwise + */ +bool xe_memirq_guc_sw_int_0_irq_pending(struct xe_memirq *memirq, struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 offset = xe_gt_is_media_type(gt) ? ilog2(INTR_MGUC) : ilog2(INTR_GUC); + struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&memirq->status, offset * SZ_16); + + return memirq_received_noclear(memirq, &map, ilog2(GUC_INTR_SW_INT_0), + guc_name(guc)); +} + /** * xe_memirq_handler - The `Memory Based Interrupts`_ Handler. * @memirq: the &xe_memirq diff --git a/drivers/gpu/drm/xe/xe_memirq.h b/drivers/gpu/drm/xe/xe_memirq.h index 06130650e9d6..e25d2234ab87 100644 --- a/drivers/gpu/drm/xe/xe_memirq.h +++ b/drivers/gpu/drm/xe/xe_memirq.h @@ -25,4 +25,6 @@ void xe_memirq_handler(struct xe_memirq *memirq); int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc); +bool xe_memirq_guc_sw_int_0_irq_pending(struct xe_memirq *memirq, struct xe_guc *guc); + #endif -- cgit v1.2.3 From e1587f16609bb34dc821d28923a231797c05c0e0 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:07 -0700 Subject: drm/xe/vf: Make VF recovery run on per-GT worker VF recovery is a per-GT operation, so it makes sense to isolate it to a per-GT queue. Scheduling this operation on the same worker as the GT reset and TDR not only aligns with this design but also helps avoid race conditions, as those operations can also modify the queue state. v2: - Fix lockdep splat (Adam) - Use xe_sriov_vf_migration_supported helper v3: - Drop xe_gt_sriov_ prefix for private functions (Michal) - Drop message in xe_gt_sriov_vf_migration_init_early (Michal) - Logic rework in vf_post_migration_notify_resfix_done (Michal) - Rework init sequence layering (Michal) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-10-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 6 + drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 181 ++++++++++++++++++++-- drivers/gpu/drm/xe/xe_gt_sriov_vf.h | 3 +- drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h | 7 + drivers/gpu/drm/xe/xe_sriov_vf.c | 240 ------------------------------ drivers/gpu/drm/xe/xe_sriov_vf.h | 1 - drivers/gpu/drm/xe/xe_sriov_vf_types.h | 4 - 7 files changed, 185 insertions(+), 257 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 8fc3a6929d6c..2bdc1cdf24a5 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -388,6 +388,12 @@ int xe_gt_init_early(struct xe_gt *gt) return err; } + if (IS_SRIOV_VF(gt_to_xe(gt))) { + err = xe_gt_sriov_vf_init_early(gt); + if (err) + return err; + } + xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); err = xe_wa_gt_init(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 43cb5fd7b222..93612ea5029f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -25,11 +25,15 @@ #include "xe_guc.h" #include "xe_guc_hxg_helpers.h" #include "xe_guc_relay.h" +#include "xe_guc_submit.h" +#include "xe_irq.h" #include "xe_lrc.h" #include "xe_memirq.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_sriov_vf.h" +#include "xe_tile_sriov_vf.h" #include "xe_uc_fw.h" #include "xe_wopcm.h" @@ -308,13 +312,13 @@ static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) } /** - * xe_gt_sriov_vf_notify_resfix_done - Notify GuC about resource fixups apply completed. + * vf_notify_resfix_done - Notify GuC about resource fixups apply completed. * @gt: the &xe_gt struct instance linked to target GuC * * Returns: 0 if the operation completed successfully, or a negative error * code otherwise. */ -int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt) +static int vf_notify_resfix_done(struct xe_gt *gt) { struct xe_guc *guc = >->uc.guc; int err; @@ -756,7 +760,7 @@ failed: * xe_gt_sriov_vf_default_lrcs_hwsp_rebase - Update GGTT references in HWSP of default LRCs. * @gt: the &xe_gt struct instance */ -void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt) +static void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt) { struct xe_hw_engine *hwe; enum xe_hw_engine_id id; @@ -765,6 +769,26 @@ void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt) xe_default_lrc_update_memirq_regs_with_address(hwe); } +static void vf_start_migration_recovery(struct xe_gt *gt) +{ + bool started; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + spin_lock(>->sriov.vf.migration.lock); + + if (!gt->sriov.vf.migration.recovery_queued) { + gt->sriov.vf.migration.recovery_queued = true; + WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true); + + started = queue_work(gt->ordered_wq, >->sriov.vf.migration.worker); + xe_gt_sriov_info(gt, "VF migration recovery %s\n", started ? + "scheduled" : "already in progress"); + } + + spin_unlock(>->sriov.vf.migration.lock); +} + /** * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery, * or just mark that a GuC is ready for it. @@ -779,15 +803,13 @@ void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) xe_gt_assert(gt, IS_SRIOV_VF(xe)); xe_gt_assert(gt, xe_gt_sriov_vf_recovery_pending(gt)); - set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags); - /* - * We need to be certain that if all flags were set, at least one - * thread will notice that and schedule the recovery. - */ - smp_mb__after_atomic(); + if (!xe_sriov_vf_migration_supported(xe)) { + xe_gt_sriov_err(gt, "migration not supported\n"); + return; + } xe_gt_sriov_info(gt, "ready for recovery after migration\n"); - xe_sriov_vf_start_migration_recovery(xe); + vf_start_migration_recovery(gt); } static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) @@ -1121,6 +1143,145 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) pf_version->major, pf_version->minor); } +static void vf_post_migration_shutdown(struct xe_gt *gt) +{ + int ret = 0; + + spin_lock_irq(>->sriov.vf.migration.lock); + gt->sriov.vf.migration.recovery_queued = false; + spin_unlock_irq(>->sriov.vf.migration.lock); + + xe_guc_submit_pause(>->uc.guc); + ret |= xe_guc_submit_reset_block(>->uc.guc); + + if (ret) + xe_gt_sriov_info(gt, "migration recovery encountered ongoing reset\n"); +} + +static size_t post_migration_scratch_size(struct xe_device *xe) +{ + return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE); +} + +static int vf_post_migration_fixups(struct xe_gt *gt) +{ + s64 shift; + void *buf; + int err; + + buf = kmalloc(post_migration_scratch_size(gt_to_xe(gt)), GFP_ATOMIC); + if (!buf) + return -ENOMEM; + + err = xe_gt_sriov_vf_query_config(gt); + if (err) + goto out; + + shift = xe_gt_sriov_vf_ggtt_shift(gt); + if (shift) { + xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); + xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt); + err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); + if (err) + goto out; + } + +out: + kfree(buf); + return err; +} + +static void vf_post_migration_kickstart(struct xe_gt *gt) +{ + /* + * Make sure interrupts on the new HW are properly set. The GuC IRQ + * must be working at this point, since the recovery did started, + * but the rest was not enabled using the procedure from spec. + */ + xe_irq_resume(gt_to_xe(gt)); + + xe_guc_submit_reset_unblock(>->uc.guc); + xe_guc_submit_unpause(>->uc.guc); +} + +static int vf_post_migration_notify_resfix_done(struct xe_gt *gt) +{ + bool skip_resfix = false; + + spin_lock_irq(>->sriov.vf.migration.lock); + if (gt->sriov.vf.migration.recovery_queued) { + skip_resfix = true; + xe_gt_sriov_dbg(gt, "another recovery imminent, resfix skipped\n"); + } else { + WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false); + } + spin_unlock_irq(>->sriov.vf.migration.lock); + + if (skip_resfix) + return -EAGAIN; + + return vf_notify_resfix_done(gt); +} + +static void vf_post_migration_recovery(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + xe_gt_sriov_dbg(gt, "migration recovery in progress\n"); + + xe_pm_runtime_get(xe); + vf_post_migration_shutdown(gt); + + if (!xe_sriov_vf_migration_supported(xe)) { + xe_gt_sriov_err(gt, "migration is not supported\n"); + err = -ENOTRECOVERABLE; + goto fail; + } + + err = vf_post_migration_fixups(gt); + if (err) + goto fail; + + vf_post_migration_kickstart(gt); + err = vf_post_migration_notify_resfix_done(gt); + if (err && err != -EAGAIN) + goto fail; + + xe_pm_runtime_put(xe); + xe_gt_sriov_notice(gt, "migration recovery ended\n"); + return; +fail: + xe_pm_runtime_put(xe); + xe_gt_sriov_err(gt, "migration recovery failed (%pe)\n", ERR_PTR(err)); + xe_device_declare_wedged(xe); +} + +static void migration_worker_func(struct work_struct *w) +{ + struct xe_gt *gt = container_of(w, struct xe_gt, + sriov.vf.migration.worker); + + vf_post_migration_recovery(gt); +} + +/** + * xe_gt_sriov_vf_init_early() - GT VF init early + * @gt: the &xe_gt + * + * Return 0 on success, errno on failure + */ +int xe_gt_sriov_vf_init_early(struct xe_gt *gt) +{ + if (!xe_sriov_vf_migration_supported(gt_to_xe(gt))) + return 0; + + spin_lock_init(>->sriov.vf.migration.lock); + INIT_WORK(>->sriov.vf.migration.worker, migration_worker_func); + + return 0; +} + /** * xe_gt_sriov_vf_recovery_pending() - VF post migration recovery pending * @gt: the &xe_gt diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index b91ae857e983..0adebf8aa419 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -21,10 +21,9 @@ void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, int xe_gt_sriov_vf_query_config(struct xe_gt *gt); int xe_gt_sriov_vf_connect(struct xe_gt *gt); int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); -void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt); -int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); +int xe_gt_sriov_vf_init_early(struct xe_gt *gt); bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt); u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index 1dfef60ec044..b2c8e8c89c30 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -7,6 +7,7 @@ #define _XE_GT_SRIOV_VF_TYPES_H_ #include +#include #include "xe_uc_fw_types.h" /** @@ -50,6 +51,12 @@ struct xe_gt_sriov_vf_runtime { * xe_gt_sriov_vf_migration - VF migration data. */ struct xe_gt_sriov_vf_migration { + /** @migration: VF migration recovery worker */ + struct work_struct worker; + /** @lock: Protects recovery_queued */ + spinlock_t lock; + /** @recovery_queued: VF post migration recovery in queued */ + bool recovery_queued; /** @recovery_inprogress: VF post migration recovery in progress */ bool recovery_inprogress; }; diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index c1830ec8f0fd..911d5720917b 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -6,21 +6,12 @@ #include #include -#include "xe_assert.h" -#include "xe_device.h" #include "xe_gt.h" -#include "xe_gt_sriov_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" -#include "xe_guc_submit.h" -#include "xe_irq.h" -#include "xe_lrc.h" -#include "xe_pm.h" -#include "xe_sriov.h" #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" #include "xe_sriov_vf_ccs.h" -#include "xe_tile_sriov_vf.h" /** * DOC: VF restore procedure in PF KMD and VF KMD @@ -158,8 +149,6 @@ static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...) xe->sriov.vf.migration.enabled = false; } -static void migration_worker_func(struct work_struct *w); - static void vf_migration_init_early(struct xe_device *xe) { /* @@ -184,8 +173,6 @@ static void vf_migration_init_early(struct xe_device *xe) guc_version.major, guc_version.minor); } - INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); - xe->sriov.vf.migration.enabled = true; xe_sriov_dbg(xe, "migration support enabled\n"); } @@ -199,233 +186,6 @@ void xe_sriov_vf_init_early(struct xe_device *xe) vf_migration_init_early(xe); } -/** - * vf_post_migration_shutdown - Stop the driver activities after VF migration. - * @xe: the &xe_device struct instance - * - * After this VM is migrated and assigned to a new VF, it is running on a new - * hardware, and therefore many hardware-dependent states and related structures - * require fixups. Without fixups, the hardware cannot do any work, and therefore - * all GPU pipelines are stalled. - * Stop some of kernel activities to make the fixup process faster. - */ -static void vf_post_migration_shutdown(struct xe_device *xe) -{ - struct xe_gt *gt; - unsigned int id; - int ret = 0; - - for_each_gt(gt, xe, id) { - xe_guc_submit_pause(>->uc.guc); - ret |= xe_guc_submit_reset_block(>->uc.guc); - } - - if (ret) - drm_info(&xe->drm, "migration recovery encountered ongoing reset\n"); -} - -/** - * vf_post_migration_kickstart - Re-start the driver activities under new hardware. - * @xe: the &xe_device struct instance - * - * After we have finished with all post-migration fixups, restart the driver - * activities to continue feeding the GPU with workloads. - */ -static void vf_post_migration_kickstart(struct xe_device *xe) -{ - struct xe_gt *gt; - unsigned int id; - - /* - * Make sure interrupts on the new HW are properly set. The GuC IRQ - * must be working at this point, since the recovery did started, - * but the rest was not enabled using the procedure from spec. - */ - xe_irq_resume(xe); - - for_each_gt(gt, xe, id) { - xe_guc_submit_reset_unblock(>->uc.guc); - xe_guc_submit_unpause(>->uc.guc); - } -} - -static bool gt_vf_post_migration_needed(struct xe_gt *gt) -{ - return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags); -} - -/* - * Notify GuCs marked in flags about resource fixups apply finished. - * @xe: the &xe_device struct instance - * @gt_flags: flags marking to which GTs the notification shall be sent - */ -static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long gt_flags) -{ - struct xe_gt *gt; - unsigned int id; - int err = 0; - - for_each_gt(gt, xe, id) { - if (!test_bit(id, >_flags)) - continue; - /* skip asking GuC for RESFIX exit if new recovery request arrived */ - if (gt_vf_post_migration_needed(gt)) - continue; - err = xe_gt_sriov_vf_notify_resfix_done(gt); - if (err) - break; - clear_bit(id, >_flags); - } - - if (gt_flags && !err) - drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n"); - return err; -} - -static int vf_get_next_migrated_gt_id(struct xe_device *xe) -{ - struct xe_gt *gt; - unsigned int id; - - for_each_gt(gt, xe, id) { - if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags)) - return id; - } - return -1; -} - -static size_t post_migration_scratch_size(struct xe_device *xe) -{ - return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE); -} - -/** - * Perform post-migration fixups on a single GT. - * - * After migration, GuC needs to be re-queried for VF configuration to check - * if it matches previous provisioning. Most of VF provisioning shall be the - * same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT - * range has changed, we have to perform fixups - shift all GGTT references - * used anywhere within the driver. After the fixups in this function succeed, - * it is allowed to ask the GuC bound to this GT to continue normal operation. - * - * Returns: 0 if the operation completed successfully, or a negative error - * code otherwise. - */ -static int gt_vf_post_migration_fixups(struct xe_gt *gt) -{ - s64 shift; - void *buf; - int err; - - buf = kmalloc(post_migration_scratch_size(gt_to_xe(gt)), GFP_KERNEL); - if (!buf) - return -ENOMEM; - - err = xe_gt_sriov_vf_query_config(gt); - if (err) - goto out; - - shift = xe_gt_sriov_vf_ggtt_shift(gt); - if (shift) { - xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); - xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt); - err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); - if (err) - goto out; - } - -out: - kfree(buf); - return err; -} - -static void vf_post_migration_recovery(struct xe_device *xe) -{ - unsigned long fixed_gts = 0; - int id, err; - - drm_dbg(&xe->drm, "migration recovery in progress\n"); - xe_pm_runtime_get(xe); - vf_post_migration_shutdown(xe); - - if (!xe_sriov_vf_migration_supported(xe)) { - xe_sriov_err(xe, "migration is not supported\n"); - err = -ENOTRECOVERABLE; - goto fail; - } - - while (id = vf_get_next_migrated_gt_id(xe), id >= 0) { - struct xe_gt *gt = xe_device_get_gt(xe, id); - - err = gt_vf_post_migration_fixups(gt); - if (err) - goto fail; - - set_bit(id, &fixed_gts); - } - - vf_post_migration_kickstart(xe); - err = vf_post_migration_notify_resfix_done(xe, fixed_gts); - if (err) - goto fail; - - xe_pm_runtime_put(xe); - drm_notice(&xe->drm, "migration recovery ended\n"); - return; -fail: - xe_pm_runtime_put(xe); - drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); - xe_device_declare_wedged(xe); -} - -static void migration_worker_func(struct work_struct *w) -{ - struct xe_device *xe = container_of(w, struct xe_device, - sriov.vf.migration.worker); - - vf_post_migration_recovery(xe); -} - -/* - * Check if post-restore recovery is coming on any of GTs. - * @xe: the &xe_device struct instance - * - * Return: True if migration recovery worker will soon be running. Any worker currently - * executing does not affect the result. - */ -static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe) -{ - struct xe_gt *gt; - unsigned int id; - - for_each_gt(gt, xe, id) { - if (test_bit(id, &xe->sriov.vf.migration.gt_flags)) - return true; - } - return false; -} - -/** - * xe_sriov_vf_start_migration_recovery - Start VF migration recovery. - * @xe: the &xe_device to start recovery on - * - * This function shall be called only by VF. - */ -void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) -{ - bool started; - - xe_assert(xe, IS_SRIOV_VF(xe)); - - if (!vf_ready_to_recovery_on_any_gts(xe)) - return; - - started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); - drm_info(&xe->drm, "VF migration recovery %s\n", started ? - "scheduled" : "already in progress"); -} - /** * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions. * @xe: the &xe_device to initialize diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h index 9e752105ec2a..4df95266b261 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf.h @@ -13,7 +13,6 @@ struct xe_device; void xe_sriov_vf_init_early(struct xe_device *xe); int xe_sriov_vf_init_late(struct xe_device *xe); -void xe_sriov_vf_start_migration_recovery(struct xe_device *xe); bool xe_sriov_vf_migration_supported(struct xe_device *xe); void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root); diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h index 426cc5841958..6a0fd0f5463e 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h @@ -33,10 +33,6 @@ struct xe_device_vf { /** @migration: VF Migration state data */ struct { - /** @migration.worker: VF migration recovery worker */ - struct work_struct worker; - /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ - unsigned long gt_flags; /** * @migration.enabled: flag indicating if migration support * was enabled or not due to missing prerequisites -- cgit v1.2.3 From 489d890a39131e86bcaf3f14ebc2570257c23356 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:08 -0700 Subject: drm/xe/vf: Abort H2G sends during VF post-migration recovery While VF post-migration recovery is in progress, abort H2G sends with -ECANCEL. These messages are treated as lost, and TLB invalidation errors are suppressed. During this phase, the H2G channel is down, and VF recovery requires the CT lock to proceed. v3: - Use xe_gt_recovery_inprogress (Michal) Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251008214532.3442967-11-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 47079ab9922c..9f0090ae64a6 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -851,7 +851,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) { - struct xe_gt *gt __maybe_unused = ct_to_gt(ct); + struct xe_gt *gt = ct_to_gt(ct); u16 seqno; int ret; @@ -872,7 +872,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, goto out; } - if (ct->state == XE_GUC_CT_STATE_STOPPED) { + if (ct->state == XE_GUC_CT_STATE_STOPPED || xe_gt_recovery_pending(gt)) { ret = -ECANCELED; goto out; } -- cgit v1.2.3 From 98e78e0c8b1bbb388d5ee702d2eb0013c2f9060a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:09 -0700 Subject: drm/xe/vf: Remove memory allocations from VF post migration recovery VF post migration recovery is the path of dma-fence signaling / reclaim, avoid memory allocations in this path. v3: - s/lrc_wa_bb/scratch (Tomasz) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-12-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 23 +++++++++++++---------- drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h | 2 ++ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 93612ea5029f..737243fd3d40 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -1165,17 +1165,13 @@ static size_t post_migration_scratch_size(struct xe_device *xe) static int vf_post_migration_fixups(struct xe_gt *gt) { + void *buf = gt->sriov.vf.migration.scratch; s64 shift; - void *buf; int err; - buf = kmalloc(post_migration_scratch_size(gt_to_xe(gt)), GFP_ATOMIC); - if (!buf) - return -ENOMEM; - err = xe_gt_sriov_vf_query_config(gt); if (err) - goto out; + return err; shift = xe_gt_sriov_vf_ggtt_shift(gt); if (shift) { @@ -1183,12 +1179,10 @@ static int vf_post_migration_fixups(struct xe_gt *gt) xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt); err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); if (err) - goto out; + return err; } -out: - kfree(buf); - return err; + return 0; } static void vf_post_migration_kickstart(struct xe_gt *gt) @@ -1273,9 +1267,18 @@ static void migration_worker_func(struct work_struct *w) */ int xe_gt_sriov_vf_init_early(struct xe_gt *gt) { + void *buf; + if (!xe_sriov_vf_migration_supported(gt_to_xe(gt))) return 0; + buf = drmm_kmalloc(>_to_xe(gt)->drm, + post_migration_scratch_size(gt_to_xe(gt)), + GFP_KERNEL); + if (!buf) + return -ENOMEM; + + gt->sriov.vf.migration.scratch = buf; spin_lock_init(>->sriov.vf.migration.lock); INIT_WORK(>->sriov.vf.migration.worker, migration_worker_func); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index b2c8e8c89c30..e753646debc4 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -55,6 +55,8 @@ struct xe_gt_sriov_vf_migration { struct work_struct worker; /** @lock: Protects recovery_queued */ spinlock_t lock; + /** @scratch: Scratch memory for VF recovery */ + void *scratch; /** @recovery_queued: VF post migration recovery in queued */ bool recovery_queued; /** @recovery_inprogress: VF post migration recovery in progress */ -- cgit v1.2.3 From cc9b24c6bb72d8da9309e2535840baa8450d4366 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:10 -0700 Subject: drm/xe: Move GGTT lock init to alloc The GGTT lock is needed very early during GT initialization for a VF; move the GGTT lock initialization to the allocation phase. v8: - Rework function structure (Michal) Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251008214532.3442967-13-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_ggtt.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7fdd0a97a628..aca7ae5489b9 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -159,6 +159,16 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) } } +static void primelockdep(struct xe_ggtt *ggtt) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&ggtt->lock); + fs_reclaim_release(GFP_KERNEL); +} + /** * xe_ggtt_alloc - Allocate a GGTT for a given &xe_tile * @tile: &xe_tile @@ -169,9 +179,19 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) */ struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile) { - struct xe_ggtt *ggtt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*ggtt), GFP_KERNEL); - if (ggtt) - ggtt->tile = tile; + struct xe_device *xe = tile_to_xe(tile); + struct xe_ggtt *ggtt; + + ggtt = drmm_kzalloc(&xe->drm, sizeof(*ggtt), GFP_KERNEL); + if (!ggtt) + return NULL; + + if (drmm_mutex_init(&xe->drm, &ggtt->lock)) + return NULL; + + primelockdep(ggtt); + ggtt->tile = tile; + return ggtt; } @@ -180,7 +200,6 @@ static void ggtt_fini_early(struct drm_device *drm, void *arg) struct xe_ggtt *ggtt = arg; destroy_workqueue(ggtt->wq); - mutex_destroy(&ggtt->lock); drm_mm_takedown(&ggtt->mm); } @@ -198,16 +217,6 @@ void xe_ggtt_might_lock(struct xe_ggtt *ggtt) } #endif -static void primelockdep(struct xe_ggtt *ggtt) -{ - if (!IS_ENABLED(CONFIG_LOCKDEP)) - return; - - fs_reclaim_acquire(GFP_KERNEL); - might_lock(&ggtt->lock); - fs_reclaim_release(GFP_KERNEL); -} - static const struct xe_ggtt_pt_ops xelp_pt_ops = { .pte_encode_flags = xelp_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte, @@ -227,8 +236,6 @@ static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u32 reserved) { drm_mm_init(&ggtt->mm, reserved, ggtt->size - reserved); - mutex_init(&ggtt->lock); - primelockdep(ggtt); } int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size) -- cgit v1.2.3 From c6d00c60c4731604aa0aa6990c8d939e776fc732 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:11 -0700 Subject: drm/xe/vf: Move LMEM config to tile layer The LMEM VF provision is tile-layer-specific information. Move the LMEM configuration to the tile layer accordingly. Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251008214532.3442967-14-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_device_types.h | 3 +++ drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 36 +++++++++-------------------- drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h | 2 -- drivers/gpu/drm/xe/xe_tile_sriov_vf.c | 33 ++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_tile_sriov_vf.h | 2 ++ drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h | 19 +++++++++++++++ drivers/gpu/drm/xe/xe_vram.c | 6 ++--- 7 files changed, 71 insertions(+), 30 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 989244418950..54d4034659cb 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -27,6 +27,7 @@ #include "xe_sriov_vf_ccs_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" +#include "xe_tile_sriov_vf_types.h" #include "xe_validation.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) @@ -193,6 +194,8 @@ struct xe_tile { struct { /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ struct xe_ggtt_node *ggtt_balloon[2]; + /** @sriov.vf.self_config: VF configuration data */ + struct xe_tile_sriov_vf_selfconfig self_config; } vf; } sriov; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 737243fd3d40..ace670a16f53 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -471,10 +471,10 @@ static int vf_get_ggtt_info(struct xe_gt *gt) static int vf_get_lmem_info(struct xe_gt *gt) { - struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; + struct xe_tile *tile = gt_to_tile(gt); struct xe_guc *guc = >->uc.guc; char size_str[10]; - u64 size; + u64 size, lmem_size; int err; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); @@ -483,18 +483,19 @@ static int vf_get_lmem_info(struct xe_gt *gt) if (unlikely(err)) return err; - if (config->lmem_size && config->lmem_size != size) { + lmem_size = xe_tile_sriov_vf_lmem(tile); + if (lmem_size && lmem_size != size) { xe_gt_sriov_err(gt, "Unexpected LMEM reassignment: %lluM != %lluM\n", - size / SZ_1M, config->lmem_size / SZ_1M); + size / SZ_1M, lmem_size / SZ_1M); return -EREMCHG; } string_get_size(size, 1, STRING_UNITS_2, size_str, sizeof(size_str)); xe_gt_sriov_dbg_verbose(gt, "LMEM %lluM %s\n", size / SZ_1M, size_str); - config->lmem_size = size; + xe_tile_sriov_vf_lmem_store(tile, size); - return config->lmem_size ? 0 : -ENODATA; + return size ? 0 : -ENODATA; } static int vf_get_submission_cfg(struct xe_gt *gt) @@ -591,23 +592,6 @@ u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt) return gt->sriov.vf.self_config.num_ctxs; } -/** - * xe_gt_sriov_vf_lmem - VF LMEM configuration. - * @gt: the &xe_gt - * - * This function is for VF use only. - * - * Return: size of the LMEM assigned to VF. - */ -u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt) -{ - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, gt->sriov.vf.guc_version.major); - xe_gt_assert(gt, gt->sriov.vf.self_config.lmem_size); - - return gt->sriov.vf.self_config.lmem_size; -} - /** * xe_gt_sriov_vf_ggtt - VF GGTT configuration. * @gt: the &xe_gt @@ -1064,6 +1048,7 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) { struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; struct xe_device *xe = gt_to_xe(gt); + u64 lmem_size; char buf[10]; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); @@ -1078,8 +1063,9 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "GGTT shift on last restore:\t%lld\n", config->ggtt_shift); if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { - string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); - drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf); + lmem_size = xe_tile_sriov_vf_lmem(gt_to_tile(gt)); + string_get_size(lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "LMEM size:\t%llu (%s)\n", lmem_size, buf); } drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index e753646debc4..aff76051c9bb 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -20,8 +20,6 @@ struct xe_gt_sriov_vf_selfconfig { u64 ggtt_size; /** @ggtt_shift: difference in ggtt_base on last migration */ s64 ggtt_shift; - /** @lmem_size: assigned size of the LMEM. */ - u64 lmem_size; /** @num_ctxs: assigned number of GuC submission context IDs. */ u16 num_ctxs; /** @num_dbs: assigned number of GuC doorbells IDs. */ diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.c b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c index f221dbed16f0..02430a53da9f 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c @@ -252,3 +252,36 @@ void xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile *tile, s64 shift) mutex_unlock(&ggtt->lock); } + +/** + * xe_tile_sriov_vf_lmem - VF LMEM configuration. + * @tile: the &xe_tile + * + * This function is for VF use only. + * + * Return: size of the LMEM assigned to VF. + */ +u64 xe_tile_sriov_vf_lmem(struct xe_tile *tile) +{ + struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + return config->lmem_size; +} + +/** + * xe_tile_sriov_vf_lmem_store - Store VF LMEM configuration + * @tile: the &xe_tile + * @lmem_size: VF LMEM size to store + * + * This function is for VF use only. + */ +void xe_tile_sriov_vf_lmem_store(struct xe_tile *tile, u64 lmem_size) +{ + struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + config->lmem_size = lmem_size; +} diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.h b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h index 93eb043171e8..86d750a57530 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h @@ -14,5 +14,7 @@ int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile); int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile); void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile); void xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile *tile, s64 shift); +u64 xe_tile_sriov_vf_lmem(struct xe_tile *tile); +void xe_tile_sriov_vf_lmem_store(struct xe_tile *tile, u64 lmem_size); #endif diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h new file mode 100644 index 000000000000..c3790b478af2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TILE_SRIOV_VF_TYPES_H_ +#define _XE_TILE_SRIOV_VF_TYPES_H_ + +#include + +/** + * struct xe_tile_sriov_vf_selfconfig - VF configuration data. + */ +struct xe_tile_sriov_vf_selfconfig { + /** @lmem_size: assigned size of the LMEM. */ + u64 lmem_size; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index 7adfccf68e4c..70bcbb188867 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -17,10 +17,10 @@ #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt_mcr.h" -#include "xe_gt_sriov_vf.h" #include "xe_mmio.h" #include "xe_module.h" #include "xe_sriov.h" +#include "xe_tile_sriov_vf.h" #include "xe_ttm_vram_mgr.h" #include "xe_vram.h" #include "xe_vram_types.h" @@ -238,9 +238,9 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size, offset = 0; for_each_tile(t, xe, id) for_each_if(t->id < tile->id) - offset += xe_gt_sriov_vf_lmem(t->primary_gt); + offset += xe_tile_sriov_vf_lmem(t); - *tile_size = xe_gt_sriov_vf_lmem(gt); + *tile_size = xe_tile_sriov_vf_lmem(tile); *vram_size = *tile_size; *tile_offset = offset; -- cgit v1.2.3 From 7dd11d880456f591972eba173bade848359d6a94 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:12 -0700 Subject: drm/xe/vf: Close multi-GT GGTT shift race As multi-GT VF post-migration recovery can run in parallel on different workqueues, but both GTs point to the same GGTT, only one GT needs to shift the GGTT. However, both GTs need to know when this step has completed. To coordinate this, perform the GGTT shift under the GGTT lock. With shift being done under the lock, storing the shift value becomes unnecessary. In addition to above, move the GGTT VF config from the GT to the tile. v3: - Update commmit message (Tomasz) v4: - Move GGTT values to tile state (Michal) - Use GGTT lock (Michal) v5: - Only take GGTT lock during recovery (CI) - Drop goto in vf_get_submission_cfg (Michal) - Add kernel doc around recovery in xe_gt_sriov_vf_query_config (Michal) v7: - Drop recovery variable (Michal) - Use _locked naming (Michal) - Use guard (Michal) v9: - Break LMEM changes into different patch (Michal) - Fix layering (Michal) Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251008214532.3442967-15-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 123 ++++++++++------------------ drivers/gpu/drm/xe/xe_gt_sriov_vf.h | 3 - drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h | 6 -- drivers/gpu/drm/xe/xe_tile_sriov_vf.c | 81 ++++++++++++++++-- drivers/gpu/drm/xe/xe_tile_sriov_vf.h | 7 +- drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h | 4 + 6 files changed, 123 insertions(+), 101 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index ace670a16f53..a16f33fea4fc 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -438,13 +438,17 @@ u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt) static int vf_get_ggtt_info(struct xe_gt *gt) { - struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; + struct xe_tile *tile = gt_to_tile(gt); + struct xe_ggtt *ggtt = tile->mem.ggtt; struct xe_guc *guc = >->uc.guc; - u64 start, size; + u64 start, size, ggtt_size; + s64 shift; int err; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + guard(mutex)(&ggtt->lock); + err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_START_KEY, &start); if (unlikely(err)) return err; @@ -453,20 +457,30 @@ static int vf_get_ggtt_info(struct xe_gt *gt) if (unlikely(err)) return err; - if (config->ggtt_size && config->ggtt_size != size) { + if (!size) + return -ENODATA; + + ggtt_size = xe_tile_sriov_vf_ggtt(tile); + if (ggtt_size && ggtt_size != size) { xe_gt_sriov_err(gt, "Unexpected GGTT reassignment: %lluK != %lluK\n", - size / SZ_1K, config->ggtt_size / SZ_1K); + size / SZ_1K, ggtt_size / SZ_1K); return -EREMCHG; } xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n", start, start + size - 1, size / SZ_1K); - config->ggtt_shift = start - (s64)config->ggtt_base; - config->ggtt_base = start; - config->ggtt_size = size; + shift = start - (s64)xe_tile_sriov_vf_ggtt_base(tile); + xe_tile_sriov_vf_ggtt_base_store(tile, start); + xe_tile_sriov_vf_ggtt_store(tile, size); + + if (shift && shift != start) { + xe_gt_sriov_info(gt, "Shifting GGTT base by %lld to 0x%016llx\n", + shift, start); + xe_tile_sriov_vf_fixup_ggtt_nodes_locked(gt_to_tile(gt), shift); + } - return config->ggtt_size ? 0 : -ENODATA; + return 0; } static int vf_get_lmem_info(struct xe_gt *gt) @@ -546,7 +560,9 @@ static void vf_cache_gmdid(struct xe_gt *gt) * xe_gt_sriov_vf_query_config - Query SR-IOV config data over MMIO. * @gt: the &xe_gt * - * This function is for VF use only. + * This function is for VF use only. This function may shift the GGTT and is + * performed under GGTT lock, making this step visible to all GTs that share a + * GGTT. * * Return: 0 on success or a negative error code on failure. */ @@ -592,58 +608,6 @@ u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt) return gt->sriov.vf.self_config.num_ctxs; } -/** - * xe_gt_sriov_vf_ggtt - VF GGTT configuration. - * @gt: the &xe_gt - * - * This function is for VF use only. - * - * Return: size of the GGTT assigned to VF. - */ -u64 xe_gt_sriov_vf_ggtt(struct xe_gt *gt) -{ - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, gt->sriov.vf.guc_version.major); - xe_gt_assert(gt, gt->sriov.vf.self_config.ggtt_size); - - return gt->sriov.vf.self_config.ggtt_size; -} - -/** - * xe_gt_sriov_vf_ggtt_base - VF GGTT base offset. - * @gt: the &xe_gt - * - * This function is for VF use only. - * - * Return: base offset of the GGTT assigned to VF. - */ -u64 xe_gt_sriov_vf_ggtt_base(struct xe_gt *gt) -{ - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, gt->sriov.vf.guc_version.major); - xe_gt_assert(gt, gt->sriov.vf.self_config.ggtt_size); - - return gt->sriov.vf.self_config.ggtt_base; -} - -/** - * xe_gt_sriov_vf_ggtt_shift - Return shift in GGTT range due to VF migration - * @gt: the &xe_gt struct instance - * - * This function is for VF use only. - * - * Return: The shift value; could be negative - */ -s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt) -{ - struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; - - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, xe_gt_is_main_type(gt)); - - return config->ggtt_shift; -} - static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) { u32 request[VF2PF_HANDSHAKE_REQUEST_MSG_LEN] = { @@ -1053,19 +1017,20 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - drm_printf(p, "GGTT range:\t%#llx-%#llx\n", - config->ggtt_base, - config->ggtt_base + config->ggtt_size - 1); - - string_get_size(config->ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf)); - drm_printf(p, "GGTT size:\t%llu (%s)\n", config->ggtt_size, buf); + if (xe_gt_is_main_type(gt)) { + u64 ggtt_size = xe_tile_sriov_vf_ggtt(gt_to_tile(gt)); + u64 ggtt_base = xe_tile_sriov_vf_ggtt_base(gt_to_tile(gt)); - drm_printf(p, "GGTT shift on last restore:\t%lld\n", config->ggtt_shift); + drm_printf(p, "GGTT range:\t%#llx-%#llx\n", + ggtt_base, ggtt_base + ggtt_size - 1); + string_get_size(ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "GGTT size:\t%llu (%s)\n", ggtt_size, buf); - if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { - lmem_size = xe_tile_sriov_vf_lmem(gt_to_tile(gt)); - string_get_size(lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); - drm_printf(p, "LMEM size:\t%llu (%s)\n", lmem_size, buf); + if (IS_DGFX(xe)) { + lmem_size = xe_tile_sriov_vf_lmem(gt_to_tile(gt)); + string_get_size(lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "LMEM size:\t%llu (%s)\n", lmem_size, buf); + } } drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs); @@ -1152,21 +1117,17 @@ static size_t post_migration_scratch_size(struct xe_device *xe) static int vf_post_migration_fixups(struct xe_gt *gt) { void *buf = gt->sriov.vf.migration.scratch; - s64 shift; int err; + /* xe_gt_sriov_vf_query_config will fixup the GGTT addresses */ err = xe_gt_sriov_vf_query_config(gt); if (err) return err; - shift = xe_gt_sriov_vf_ggtt_shift(gt); - if (shift) { - xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); - xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt); - err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); - if (err) - return err; - } + xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt); + err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); + if (err) + return err; return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index 0adebf8aa419..2eb793a2d8ba 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -29,9 +29,6 @@ bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt); u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt); u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt); -u64 xe_gt_sriov_vf_ggtt(struct xe_gt *gt); -u64 xe_gt_sriov_vf_ggtt_base(struct xe_gt *gt); -s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt); u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg); void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index aff76051c9bb..0d9e217989af 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -14,12 +14,6 @@ * struct xe_gt_sriov_vf_selfconfig - VF configuration data. */ struct xe_gt_sriov_vf_selfconfig { - /** @ggtt_base: assigned base offset of the GGTT region. */ - u64 ggtt_base; - /** @ggtt_size: assigned size of the GGTT region. */ - u64 ggtt_size; - /** @ggtt_shift: difference in ggtt_base on last migration */ - s64 ggtt_shift; /** @num_ctxs: assigned number of GuC submission context IDs. */ u16 num_ctxs; /** @num_dbs: assigned number of GuC doorbells IDs. */ diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.c b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c index 02430a53da9f..c9bac2cfdd04 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c @@ -9,7 +9,6 @@ #include "xe_assert.h" #include "xe_ggtt.h" -#include "xe_gt_sriov_vf.h" #include "xe_sriov.h" #include "xe_sriov_printk.h" #include "xe_tile_sriov_vf.h" @@ -40,10 +39,10 @@ static int vf_init_ggtt_balloons(struct xe_tile *tile) * * Return: 0 on success or a negative error code on failure. */ -int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile) +static int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile) { - u64 ggtt_base = xe_gt_sriov_vf_ggtt_base(tile->primary_gt); - u64 ggtt_size = xe_gt_sriov_vf_ggtt(tile->primary_gt); + u64 ggtt_base = tile->sriov.vf.self_config.ggtt_base; + u64 ggtt_size = tile->sriov.vf.self_config.ggtt_size; struct xe_device *xe = tile_to_xe(tile); u64 wopcm = xe_wopcm_size(xe); u64 start, end; @@ -232,7 +231,7 @@ int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile) */ /** - * xe_tile_sriov_vf_fixup_ggtt_nodes - Shift GGTT allocations to match assigned range. + * xe_tile_sriov_vf_fixup_ggtt_nodes_locked - Shift GGTT allocations to match assigned range. * @tile: the &xe_tile struct instance * @shift: the shift value * @@ -240,17 +239,15 @@ int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile) * within the global space. This range might have changed during migration, * which requires all memory addresses pointing to GGTT to be shifted. */ -void xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile *tile, s64 shift) +void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift) { struct xe_ggtt *ggtt = tile->mem.ggtt; - mutex_lock(&ggtt->lock); + lockdep_assert_held(&ggtt->lock); xe_tile_sriov_vf_deballoon_ggtt_locked(tile); xe_ggtt_shift_nodes_locked(ggtt, shift); xe_tile_sriov_vf_balloon_ggtt_locked(tile); - - mutex_unlock(&ggtt->lock); } /** @@ -285,3 +282,69 @@ void xe_tile_sriov_vf_lmem_store(struct xe_tile *tile, u64 lmem_size) config->lmem_size = lmem_size; } + +/** + * xe_tile_sriov_vf_ggtt - VF GGTT configuration. + * @tile: the &xe_tile + * + * This function is for VF use only. + * + * Return: size of the GGTT assigned to VF. + */ +u64 xe_tile_sriov_vf_ggtt(struct xe_tile *tile) +{ + struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + return config->ggtt_size; +} + +/** + * xe_tile_sriov_vf_ggtt_store - Store VF GGTT configuration + * @tile: the &xe_tile + * @ggtt_size: VF GGTT size to store + * + * This function is for VF use only. + */ +void xe_tile_sriov_vf_ggtt_store(struct xe_tile *tile, u64 ggtt_size) +{ + struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + config->ggtt_size = ggtt_size; +} + +/** + * xe_tile_sriov_vf_ggtt_base - VF GGTT base configuration. + * @tile: the &xe_tile + * + * This function is for VF use only. + * + * Return: base of the GGTT assigned to VF. + */ +u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile) +{ + struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + return config->ggtt_base; +} + +/** + * xe_tile_sriov_vf_ggtt_base_store - Store VF GGTT base configuration + * @tile: the &xe_tile + * @ggtt_base: VF GGTT base to store + * + * This function is for VF use only. + */ +void xe_tile_sriov_vf_ggtt_base_store(struct xe_tile *tile, u64 ggtt_base) +{ + struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + config->ggtt_base = ggtt_base; +} diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.h b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h index 86d750a57530..749f41504883 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h @@ -11,9 +11,12 @@ struct xe_tile; int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile); -int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile); void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile); -void xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile *tile, s64 shift); +void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift); +u64 xe_tile_sriov_vf_ggtt(struct xe_tile *tile); +void xe_tile_sriov_vf_ggtt_store(struct xe_tile *tile, u64 ggtt_size); +u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile); +void xe_tile_sriov_vf_ggtt_base_store(struct xe_tile *tile, u64 ggtt_size); u64 xe_tile_sriov_vf_lmem(struct xe_tile *tile); void xe_tile_sriov_vf_lmem_store(struct xe_tile *tile, u64 lmem_size); diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h index c3790b478af2..4807ca51614c 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h @@ -12,6 +12,10 @@ * struct xe_tile_sriov_vf_selfconfig - VF configuration data. */ struct xe_tile_sriov_vf_selfconfig { + /** @ggtt_base: assigned base offset of the GGTT region. */ + u64 ggtt_base; + /** @ggtt_size: assigned size of the GGTT region. */ + u64 ggtt_size; /** @lmem_size: assigned size of the LMEM. */ u64 lmem_size; }; -- cgit v1.2.3 From b47c0c07c35022c56a7ab4d3ce8a9c5f8cce8453 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:13 -0700 Subject: drm/xe/vf: Teardown VF post migration worker on driver unload Be cautious and ensure the VF post-migration worker is not running during driver unload. v3: - More teardown later in driver init, use devm (Tomasz) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-16-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 6 ++++++ drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 34 ++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_gt_sriov_vf.h | 1 + drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h | 4 +++- 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 2bdc1cdf24a5..d713b649447b 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -651,6 +651,12 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; + if (IS_SRIOV_VF(gt_to_xe(gt))) { + err = xe_gt_sriov_vf_init(gt); + if (err) + return err; + } + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index a16f33fea4fc..1aae4e40a366 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -725,7 +725,8 @@ static void vf_start_migration_recovery(struct xe_gt *gt) spin_lock(>->sriov.vf.migration.lock); - if (!gt->sriov.vf.migration.recovery_queued) { + if (!gt->sriov.vf.migration.recovery_queued || + !gt->sriov.vf.migration.recovery_teardown) { gt->sriov.vf.migration.recovery_queued = true; WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true); @@ -1206,6 +1207,17 @@ static void migration_worker_func(struct work_struct *w) vf_post_migration_recovery(gt); } +static void vf_migration_fini(void *arg) +{ + struct xe_gt *gt = arg; + + spin_lock_irq(>->sriov.vf.migration.lock); + gt->sriov.vf.migration.recovery_teardown = true; + spin_unlock_irq(>->sriov.vf.migration.lock); + + cancel_work_sync(>->sriov.vf.migration.worker); +} + /** * xe_gt_sriov_vf_init_early() - GT VF init early * @gt: the &xe_gt @@ -1232,6 +1244,26 @@ int xe_gt_sriov_vf_init_early(struct xe_gt *gt) return 0; } +/** + * xe_gt_sriov_vf_init() - GT VF init + * @gt: the &xe_gt + * + * Return 0 on success, errno on failure + */ +int xe_gt_sriov_vf_init(struct xe_gt *gt) +{ + if (!xe_sriov_vf_migration_supported(gt_to_xe(gt))) + return 0; + + /* + * We want to tear down the VF post-migration early during driver + * unload; therefore, we add this finalization action later during + * driver load. + */ + return devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, + vf_migration_fini, gt); +} + /** * xe_gt_sriov_vf_recovery_pending() - VF post migration recovery pending * @gt: the &xe_gt diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index 2eb793a2d8ba..1d2eaa52f804 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -24,6 +24,7 @@ int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); int xe_gt_sriov_vf_init_early(struct xe_gt *gt); +int xe_gt_sriov_vf_init(struct xe_gt *gt); bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt); u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index 0d9e217989af..bdd9b968f204 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -45,10 +45,12 @@ struct xe_gt_sriov_vf_runtime { struct xe_gt_sriov_vf_migration { /** @migration: VF migration recovery worker */ struct work_struct worker; - /** @lock: Protects recovery_queued */ + /** @lock: Protects recovery_queued, teardown */ spinlock_t lock; /** @scratch: Scratch memory for VF recovery */ void *scratch; + /** @recovery_teardown: VF post migration recovery is being torn down */ + bool recovery_teardown; /** @recovery_queued: VF post migration recovery in queued */ bool recovery_queued; /** @recovery_inprogress: VF post migration recovery in progress */ -- cgit v1.2.3 From f1029b9dde253f37c548f0c848022521a3b92732 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:14 -0700 Subject: drm/xe/vf: Don't allow GT reset to be queued during VF post migration recovery With well-behaved software, a GT reset should never occur, nor should it happen during VF post-migration recovery. If it does, trigger a warning but suppress the GT reset, as VF post-migration recovery is expected to bring the VF back to a working state. v3: - Better commit message (Tomasz) v5: - Use xe_gt_WARN_ON (Michal) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-17-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 9 -------- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 7 ------- drivers/gpu/drm/xe/xe_guc_submit.c | 42 +++++-------------------------------- drivers/gpu/drm/xe/xe_guc_submit.h | 3 --- 4 files changed, 5 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index d713b649447b..6951fedd4350 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -803,11 +803,6 @@ static int do_gt_restart(struct xe_gt *gt) return 0; } -static int gt_wait_reset_unblock(struct xe_gt *gt) -{ - return xe_guc_wait_reset_unblock(>->uc.guc); -} - static int gt_reset(struct xe_gt *gt) { unsigned int fw_ref; @@ -822,10 +817,6 @@ static int gt_reset(struct xe_gt *gt) xe_gt_info(gt, "reset started\n"); - err = gt_wait_reset_unblock(gt); - if (!err) - xe_gt_warn(gt, "reset block failed to get lifted"); - xe_pm_runtime_get(gt_to_xe(gt)); if (xe_fault_inject_gt_reset()) { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 1aae4e40a366..96c93a64d754 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -1097,17 +1097,11 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) static void vf_post_migration_shutdown(struct xe_gt *gt) { - int ret = 0; - spin_lock_irq(>->sriov.vf.migration.lock); gt->sriov.vf.migration.recovery_queued = false; spin_unlock_irq(>->sriov.vf.migration.lock); xe_guc_submit_pause(>->uc.guc); - ret |= xe_guc_submit_reset_block(>->uc.guc); - - if (ret) - xe_gt_sriov_info(gt, "migration recovery encountered ongoing reset\n"); } static size_t post_migration_scratch_size(struct xe_device *xe) @@ -1142,7 +1136,6 @@ static void vf_post_migration_kickstart(struct xe_gt *gt) */ xe_irq_resume(gt_to_xe(gt)); - xe_guc_submit_reset_unblock(>->uc.guc); xe_guc_submit_unpause(>->uc.guc); } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index d123bdb63369..59371b7cc8a4 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -27,6 +27,7 @@ #include "xe_gt.h" #include "xe_gt_clock.h" #include "xe_gt_printk.h" +#include "xe_gt_sriov_vf.h" #include "xe_guc.h" #include "xe_guc_capture.h" #include "xe_guc_ct.h" @@ -1900,47 +1901,14 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) } } -/** - * xe_guc_submit_reset_block - Disallow reset calls on given GuC. - * @guc: the &xe_guc struct instance - */ -int xe_guc_submit_reset_block(struct xe_guc *guc) -{ - return atomic_fetch_or(1, &guc->submission_state.reset_blocked); -} - -/** - * xe_guc_submit_reset_unblock - Allow back reset calls on given GuC. - * @guc: the &xe_guc struct instance - */ -void xe_guc_submit_reset_unblock(struct xe_guc *guc) -{ - atomic_set_release(&guc->submission_state.reset_blocked, 0); - wake_up_all(&guc->ct.wq); -} - -static int guc_submit_reset_is_blocked(struct xe_guc *guc) -{ - return atomic_read_acquire(&guc->submission_state.reset_blocked); -} - -/* Maximum time of blocking reset */ -#define RESET_BLOCK_PERIOD_MAX (HZ * 5) - -/** - * xe_guc_wait_reset_unblock - Wait until reset blocking flag is lifted, or timeout. - * @guc: the &xe_guc struct instance - */ -int xe_guc_wait_reset_unblock(struct xe_guc *guc) -{ - return wait_event_timeout(guc->ct.wq, - !guc_submit_reset_is_blocked(guc), RESET_BLOCK_PERIOD_MAX); -} - int xe_guc_submit_reset_prepare(struct xe_guc *guc) { int ret; + if (xe_gt_WARN_ON(guc_to_gt(guc), + xe_gt_sriov_vf_recovery_pending(guc_to_gt(guc)))) + return 0; + if (!guc->submission_state.initialized) return 0; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 5b4a0a6fd818..f535fe3895e5 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -22,9 +22,6 @@ void xe_guc_submit_stop(struct xe_guc *guc); int xe_guc_submit_start(struct xe_guc *guc); void xe_guc_submit_pause(struct xe_guc *guc); void xe_guc_submit_unpause(struct xe_guc *guc); -int xe_guc_submit_reset_block(struct xe_guc *guc); -void xe_guc_submit_reset_unblock(struct xe_guc *guc); -int xe_guc_wait_reset_unblock(struct xe_guc *guc); void xe_guc_submit_wedge(struct xe_guc *guc); int xe_guc_read_stopped(struct xe_guc *guc); -- cgit v1.2.3 From a4dae94aad6ace04d396034e99a93b0e489b3da4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:15 -0700 Subject: drm/xe/vf: Wakeup in GuC backend on VF post migration recovery If VF post-migration recovery is in progress, the recovery flow will rebuild all GuC submission state. In this case, exit all waiters to ensure that submission queue scheduling can also be paused. Avoid taking any adverse actions after aborting the wait. As part of waking up the GuC backend, suspend_wait can now return -EAGAIN indicating the waiter should be retried. If the caller is running on work item, that work item need to be requeued to avoid a deadlock for the work item blocking the VF migration recovery work item. v3: - Don't block in preempt fence work queue as this can interfere with VF post-migration work queue scheduling leading to deadlock (Testing) - Use xe_gt_recovery_inprogress (Michal) v5: - Use static function for vf_recovery (Michal) - Add helper to wake CT waiters (Michal) - Move some code to following patch (Michal) - Adjust commit message to explain suspend_wait returning -EAGAIN (Michal) - Add kernel doc to suspend_wait around returning -EAGAIN v7: - Add comment on why a shared wait queue is need on VFs (Michal) - Guard again suspend_wait signaling early on resfix donw (Tomasz) v8: - Fix kernel doc (CI) Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251008214532.3442967-18-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec_queue_types.h | 3 ++ drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 4 ++ drivers/gpu/drm/xe/xe_guc_ct.h | 9 ++++ drivers/gpu/drm/xe/xe_guc_submit.c | 93 ++++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_preempt_fence.c | 11 ++++ 5 files changed, 99 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 27b76cf9da89..282505fa1377 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -207,6 +207,9 @@ struct xe_exec_queue_ops { * call after suspend. In dma-fencing path thus must return within a * reasonable amount of time. -ETIME return shall indicate an error * waiting for suspend resulting in associated VM getting killed. + * -EAGAIN return indicates the wait should be tried again, if the wait + * is within a work item, the work item should be requeued as deadlock + * avoidance mechanism. */ int (*suspend_wait)(struct xe_exec_queue *q); /** diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 96c93a64d754..b851285b8756 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -23,6 +23,7 @@ #include "xe_gt_sriov_vf.h" #include "xe_gt_sriov_vf_types.h" #include "xe_guc.h" +#include "xe_guc_ct.h" #include "xe_guc_hxg_helpers.h" #include "xe_guc_relay.h" #include "xe_guc_submit.h" @@ -729,6 +730,9 @@ static void vf_start_migration_recovery(struct xe_gt *gt) !gt->sriov.vf.migration.recovery_teardown) { gt->sriov.vf.migration.recovery_queued = true; WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true); + smp_wmb(); /* Ensure above write visable before wake */ + + xe_guc_ct_wake_waiters(>->uc.guc.ct); started = queue_work(gt->ordered_wq, >->sriov.vf.migration.worker); xe_gt_sriov_info(gt, "VF migration recovery %s\n", started ? diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index d6c81325a76c..ae49364f6f28 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -72,4 +72,13 @@ xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len) long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct); +/** + * xe_guc_ct_wake_waiters() - GuC CT wake up waiters + * @ct: GuC CT object + */ +static inline void xe_guc_ct_wake_waiters(struct xe_guc_ct *ct) +{ + wake_up_all(&ct->wq); +} + #endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 59371b7cc8a4..7f0ea35f4f0a 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -27,7 +27,6 @@ #include "xe_gt.h" #include "xe_gt_clock.h" #include "xe_gt_printk.h" -#include "xe_gt_sriov_vf.h" #include "xe_guc.h" #include "xe_guc_capture.h" #include "xe_guc_ct.h" @@ -702,6 +701,11 @@ static u32 wq_space_until_wrap(struct xe_exec_queue *q) return (WQ_SIZE - q->guc->wqi_tail); } +static bool vf_recovery(struct xe_guc *guc) +{ + return xe_gt_recovery_pending(guc_to_gt(guc)); +} + static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) { struct xe_guc *guc = exec_queue_to_guc(q); @@ -711,7 +715,7 @@ static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) #define AVAILABLE_SPACE \ CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) - if (wqi_size > AVAILABLE_SPACE) { + if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { try_again: q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); if (wqi_size > AVAILABLE_SPACE) { @@ -910,9 +914,10 @@ static void disable_scheduling_deregister(struct xe_guc *guc, ret = wait_event_timeout(guc->ct.wq, (!exec_queue_pending_enable(q) && !exec_queue_pending_disable(q)) || - xe_guc_read_stopped(guc), + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); - if (!ret) { + if (!ret && !vf_recovery(guc)) { struct xe_gpu_scheduler *sched = &q->guc->sched; xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); @@ -1015,6 +1020,10 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) bool wedged = false; xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); + + if (vf_recovery(guc)) + return; + trace_xe_exec_queue_lr_cleanup(q); if (!exec_queue_killed(q)) @@ -1047,7 +1056,11 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) */ ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_disable(q) || - xe_guc_read_stopped(guc), HZ * 5); + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); + if (vf_recovery(guc)) + return; + if (!ret) { xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n", q->guc->id); @@ -1137,8 +1150,9 @@ static void enable_scheduling(struct xe_exec_queue *q) ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || - xe_guc_read_stopped(guc), HZ * 5); - if (!ret || xe_guc_read_stopped(guc)) { + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); + if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); set_exec_queue_banned(q); xe_gt_reset_async(q->gt); @@ -1209,7 +1223,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * list so job can be freed and kick scheduler ensuring free job is not * lost. */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || + vf_recovery(guc)) return DRM_GPU_SCHED_STAT_NO_HANG; /* Kill the run_job entry point */ @@ -1261,7 +1276,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) ret = wait_event_timeout(guc->ct.wq, (!exec_queue_pending_enable(q) && !exec_queue_pending_disable(q)) || - xe_guc_read_stopped(guc), HZ * 5); + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); + if (vf_recovery(guc)) + goto handle_vf_resume; if (!ret || xe_guc_read_stopped(guc)) goto trigger_reset; @@ -1286,7 +1304,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) smp_rmb(); ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_disable(q) || - xe_guc_read_stopped(guc), HZ * 5); + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); + if (vf_recovery(guc)) + goto handle_vf_resume; if (!ret || xe_guc_read_stopped(guc)) { trigger_reset: if (!ret) @@ -1391,6 +1412,7 @@ rearm: * some thought, do this in a follow up. */ xe_sched_submission_start(sched); +handle_vf_resume: return DRM_GPU_SCHED_STAT_NO_HANG; } @@ -1487,11 +1509,24 @@ static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *ms static void __suspend_fence_signal(struct xe_exec_queue *q) { + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + if (!q->guc->suspend_pending) return; WRITE_ONCE(q->guc->suspend_pending, false); - wake_up(&q->guc->suspend_wait); + + /* + * We use a GuC shared wait queue for VFs because the VF resfix start + * interrupt must be able to wake all instances of suspend_wait. This + * prevents the VF migration worker from being starved during + * scheduling. + */ + if (IS_SRIOV_VF(xe)) + wake_up_all(&guc->ct.wq); + else + wake_up(&q->guc->suspend_wait); } static void suspend_fence_signal(struct xe_exec_queue *q) @@ -1512,8 +1547,9 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && exec_queue_enabled(q)) { - wait_event(guc->ct.wq, (q->guc->resume_time != RESUME_PENDING || - xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q)); + wait_event(guc->ct.wq, vf_recovery(guc) || + ((q->guc->resume_time != RESUME_PENDING || + xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); if (!xe_guc_read_stopped(guc)) { s64 since_resume_ms = @@ -1640,7 +1676,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) q->entity = &ge->entity; - if (xe_guc_read_stopped(guc)) + if (xe_guc_read_stopped(guc) || vf_recovery(guc)) xe_sched_stop(sched); mutex_unlock(&guc->submission_state.lock); @@ -1786,6 +1822,7 @@ static int guc_exec_queue_suspend(struct xe_exec_queue *q) static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); int ret; /* @@ -1793,11 +1830,21 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) * suspend_pending upon kill but to be paranoid but races in which * suspend_pending is set after kill also check kill here. */ - ret = wait_event_interruptible_timeout(q->guc->suspend_wait, - !READ_ONCE(q->guc->suspend_pending) || - exec_queue_killed(q) || - xe_guc_read_stopped(guc), - HZ * 5); +#define WAIT_COND \ + (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ + xe_guc_read_stopped(guc)) + +retry: + if (IS_SRIOV_VF(xe)) + ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || + vf_recovery(guc), + HZ * 5); + else + ret = wait_event_interruptible_timeout(q->guc->suspend_wait, + WAIT_COND, HZ * 5); + + if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) + return -EAGAIN; if (!ret) { xe_gt_warn(guc_to_gt(guc), @@ -1805,8 +1852,13 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) q->guc->id); /* XXX: Trigger GT reset? */ return -ETIME; + } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { + /* Corner case on RESFIX DONE where vf_recovery() changes */ + goto retry; } +#undef WAIT_COND + return ret < 0 ? ret : 0; } @@ -1905,8 +1957,7 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) { int ret; - if (xe_gt_WARN_ON(guc_to_gt(guc), - xe_gt_sriov_vf_recovery_pending(guc_to_gt(guc)))) + if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) return 0; if (!guc->submission_state.initialized) diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 83fbeea5aa20..7f587ca3947d 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -8,6 +8,8 @@ #include #include "xe_exec_queue.h" +#include "xe_gt_printk.h" +#include "xe_guc_exec_queue_types.h" #include "xe_vm.h" static void preempt_fence_work_func(struct work_struct *w) @@ -22,6 +24,15 @@ static void preempt_fence_work_func(struct work_struct *w) } else if (!q->ops->reset_status(q)) { int err = q->ops->suspend_wait(q); + if (err == -EAGAIN) { + xe_gt_dbg(q->gt, "PREEMPT FENCE RETRY guc_id=%d", + q->guc->id); + queue_work(q->vm->xe->preempt_fence_wq, + &pfence->preempt_work); + dma_fence_end_signalling(cookie); + return; + } + if (err) dma_fence_set_error(&pfence->base, err); } else { -- cgit v1.2.3 From 1faeeea056abb05a6513418e4cdb208326a67749 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:16 -0700 Subject: drm/xe/vf: Avoid indefinite blocking in preempt rebind worker for VFs supporting migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Blocking in work queues on a hardware action that may never occur — especially when it depends on a software fixup also scheduled on the a work queue — is a recipe for deadlock. This situation arises with the preempt rebind worker and VF post-migration recovery. To prevent potential deadlocks, avoid indefinite blocking in the preempt rebind worker for VFs that support migration. v4: - Use dma_fence_wait_timeout (CI) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-19-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4e914928e0a9..faca626702b8 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -35,6 +35,7 @@ #include "xe_pt.h" #include "xe_pxp.h" #include "xe_res_cursor.h" +#include "xe_sriov_vf.h" #include "xe_svm.h" #include "xe_sync.h" #include "xe_tile.h" @@ -111,12 +112,22 @@ static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, static int wait_for_existing_preempt_fences(struct xe_vm *vm) { struct xe_exec_queue *q; + bool vf_migration = IS_SRIOV_VF(vm->xe) && + xe_sriov_vf_migration_supported(vm->xe); + signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT; xe_vm_assert_held(vm); list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { if (q->lr.pfence) { - long timeout = dma_fence_wait(q->lr.pfence, false); + long timeout; + + timeout = dma_fence_wait_timeout(q->lr.pfence, false, + wait_time); + if (!timeout) { + xe_assert(vm->xe, vf_migration); + return -EAGAIN; + } /* Only -ETIME on fence indicates VM needs to be killed */ if (timeout < 0 || q->lr.pfence->error == -ETIME) @@ -541,6 +552,19 @@ out_unlock: out_unlock_outer: if (err == -EAGAIN) { trace_xe_vm_rebind_worker_retry(vm); + + /* + * We can't block in workers on a VF which supports migration + * given this can block the VF post-migration workers from + * getting scheduled. + */ + if (IS_SRIOV_VF(vm->xe) && + xe_sriov_vf_migration_supported(vm->xe)) { + up_write(&vm->lock); + xe_vm_queue_rebind_worker(vm); + return; + } + goto retry; } -- cgit v1.2.3 From 1f135a1ee9d2f80c56531971901ed87538f709b7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:17 -0700 Subject: drm/xe/vf: Use GUC_HXG_TYPE_EVENT for GuC context register The only case where the GuC submission backend cannot reason 100% correctly is when a GuC context is registered during VF post-migration recovery. In this scenario, it's possible that the GuC context register H2G is processed, but the immediately following schedule-enable H2G gets lost. The schedule-enable G2H "done" response is how the GuC state machine determines whether context registration has completed. A double register is harmless when using `GUC_HXG_TYPE_EVENT`, as GuC simply drops the duplicate H2G. To keep things simple, use `GUC_HXG_TYPE_EVENT` for all context registrations on VFs. v5: - Check for xe_sriov_vf_migration_supported (Tomasz) v7: - Add comment about subsequent protocol failures (Tomasz) - Modify commit message (Michal) Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-20-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 9f0090ae64a6..f63ce0cec357 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -32,6 +32,7 @@ #include "xe_guc_tlb_inval.h" #include "xe_map.h" #include "xe_pm.h" +#include "xe_sriov_vf.h" #include "xe_trace_guc.h" static void receive_g2h(struct xe_guc_ct *ct); @@ -736,6 +737,28 @@ static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence) return seqno; } +#define MAKE_ACTION(type, __action) \ +({ \ + FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) | \ + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | \ + GUC_HXG_EVENT_MSG_0_DATA0, __action); \ +}) + +static bool vf_action_can_safely_fail(struct xe_device *xe, u32 action) +{ + /* + * When resuming a VF, we can't reliably track whether context + * registration has completed in the GuC state machine. It is harmless + * to resend the request, as it will fail silently if GUC_HXG_TYPE_EVENT + * is used. Additionally, if there is an H2G protocol issue on a VF, + * subsequent H2G messages sent as GUC_HXG_TYPE_FAST_REQUEST will likely + * fail. + */ + return IS_SRIOV_VF(xe) && xe_sriov_vf_migration_supported(xe) && + (action == XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC || + action == XE_GUC_ACTION_REGISTER_CONTEXT); +} + #define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, @@ -807,18 +830,14 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value); if (want_response) { - cmd[1] = - FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | - FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | - GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + cmd[1] = MAKE_ACTION(GUC_HXG_TYPE_REQUEST, action[0]); + } else if (vf_action_can_safely_fail(xe, action[0])) { + cmd[1] = MAKE_ACTION(GUC_HXG_TYPE_EVENT, action[0]); } else { fast_req_track(ct, ct_fence_value, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0])); - cmd[1] = - FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) | - FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | - GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + cmd[1] = MAKE_ACTION(GUC_HXG_TYPE_FAST_REQUEST, action[0]); } /* H2G header in cmd[1] replaces action[0] so: */ -- cgit v1.2.3 From 3061e8e0dd6dcabca9c1440abdf1f6ac3fe6bc74 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:18 -0700 Subject: drm/xe/vf: Flush and stop CTs in VF post migration recovery Flushing CTs (i.e., progressing all pending G2H messages) gives VF post-migration recovery an accurate view of which H2G messages the GuC has processed, enabling the GuC submission state machine to correctly rebuild all state. Also, stop all CT traffic, as the CT is not live during VF post-migration recovery. v3: - xe_guc_ct_flush_and_stop rename (Michal) - Drop extra GuC CT WQ wake up (Michal) Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251008214532.3442967-21-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 1 + drivers/gpu/drm/xe/xe_guc_ct.c | 10 ++++++++++ drivers/gpu/drm/xe/xe_guc_ct.h | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index b851285b8756..e6f6a5c8ef1b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -1105,6 +1105,7 @@ static void vf_post_migration_shutdown(struct xe_gt *gt) gt->sriov.vf.migration.recovery_queued = false; spin_unlock_irq(>->sriov.vf.migration.lock); + xe_guc_ct_flush_and_stop(>->uc.guc.ct); xe_guc_submit_pause(>->uc.guc); } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index f63ce0cec357..fd2385635962 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -574,6 +574,16 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct) stop_g2h_handler(ct); } +/** + * xe_guc_ct_flush_and_stop - Flush and stop all processing of G2H / H2G + * @ct: the &xe_guc_ct + */ +void xe_guc_ct_flush_and_stop(struct xe_guc_ct *ct) +{ + receive_g2h(ct); + xe_guc_ct_stop(ct); +} + /** * xe_guc_ct_stop - Set GuC to stopped state * @ct: the &xe_guc_ct diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index ae49364f6f28..f8370fa4727f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -17,6 +17,7 @@ int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); void xe_guc_ct_stop(struct xe_guc_ct *ct); +void xe_guc_ct_flush_and_stop(struct xe_guc_ct *ct); void xe_guc_ct_fast_path(struct xe_guc_ct *ct); struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct); -- cgit v1.2.3 From 24687730cdc7c64dcccfb8c49e123f3fb00576c5 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:19 -0700 Subject: drm/xe/vf: Reset TLB invalidations during VF post migration recovery TLB invalidations requests can be lost during VF post-migration recovery. Since the VF has migrated, these invalidations are no longer needed. Reset the TLB invalidation frontend, which will signal all pending fences. v3: - Move TLB invalidation reset after pausing submission (Tomasz) - Adjust commit message (Michal) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-22-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index e6f6a5c8ef1b..75a20de71d8b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -35,6 +35,7 @@ #include "xe_sriov.h" #include "xe_sriov_vf.h" #include "xe_tile_sriov_vf.h" +#include "xe_tlb_inval.h" #include "xe_uc_fw.h" #include "xe_wopcm.h" @@ -1107,6 +1108,7 @@ static void vf_post_migration_shutdown(struct xe_gt *gt) xe_guc_ct_flush_and_stop(>->uc.guc.ct); xe_guc_submit_pause(>->uc.guc); + xe_tlb_inval_reset(>->tlb_inval); } static size_t post_migration_scratch_size(struct xe_device *xe) -- cgit v1.2.3 From fe3a615dadd398f73cde00a5ba32389958242dec Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:20 -0700 Subject: drm/xe/vf: Kickstart after resfix in VF post migration recovery GuC needs to be live for the GuC submission state machine to resubmit anything lost during VF post-migration recovery. Therefore, move the kickstart step after `resfix` to ensure proper resubmission. Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-23-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 75a20de71d8b..34fb8699b7a2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -1136,13 +1136,6 @@ static int vf_post_migration_fixups(struct xe_gt *gt) static void vf_post_migration_kickstart(struct xe_gt *gt) { - /* - * Make sure interrupts on the new HW are properly set. The GuC IRQ - * must be working at this point, since the recovery did started, - * but the rest was not enabled using the procedure from spec. - */ - xe_irq_resume(gt_to_xe(gt)); - xe_guc_submit_unpause(>->uc.guc); } @@ -1162,6 +1155,13 @@ static int vf_post_migration_notify_resfix_done(struct xe_gt *gt) if (skip_resfix) return -EAGAIN; + /* + * Make sure interrupts on the new HW are properly set. The GuC IRQ + * must be working at this point, since the recovery did started, + * but the rest was not enabled using the procedure from spec. + */ + xe_irq_resume(gt_to_xe(gt)); + return vf_notify_resfix_done(gt); } @@ -1185,11 +1185,12 @@ static void vf_post_migration_recovery(struct xe_gt *gt) if (err) goto fail; - vf_post_migration_kickstart(gt); err = vf_post_migration_notify_resfix_done(gt); if (err && err != -EAGAIN) goto fail; + vf_post_migration_kickstart(gt); + xe_pm_runtime_put(xe); xe_gt_sriov_notice(gt, "migration recovery ended\n"); return; -- cgit v1.2.3 From 4bdfb05305f3bbf6fe25f1f0dbc286c523bc326f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:21 -0700 Subject: drm/xe: Add CTB_H2G_BUFFER_OFFSET define Rather than open coding the H2G buffer offset as 'CTB_DESC_SIZE * 2' add CTB_H2G_BUFFER_OFFSET define. Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251008214532.3442967-24-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index fd2385635962..503cf5cb5d33 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -167,6 +167,7 @@ ct_to_xe(struct xe_guc_ct *ct) */ #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) +#define CTB_H2G_BUFFER_OFFSET (CTB_DESC_SIZE * 2) #define CTB_H2G_BUFFER_SIZE (SZ_4K) #define CTB_G2H_BUFFER_SIZE (SZ_128K) #define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2) @@ -190,7 +191,7 @@ long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct) static size_t guc_ct_size(void) { - return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + + return CTB_H2G_BUFFER_OFFSET + CTB_H2G_BUFFER_SIZE + CTB_G2H_BUFFER_SIZE; } @@ -331,7 +332,7 @@ static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g, h2g->desc = *map; xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); - h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2); + h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_H2G_BUFFER_OFFSET); } static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h, @@ -349,7 +350,7 @@ static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h, g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE); xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); - g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 + + g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_H2G_BUFFER_OFFSET + CTB_H2G_BUFFER_SIZE); } @@ -360,7 +361,7 @@ static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct) int err; desc_addr = xe_bo_ggtt_addr(ct->bo); - ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2; + ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_H2G_BUFFER_OFFSET; size = ct->ctbs.h2g.info.size * sizeof(u32); err = xe_guc_self_cfg64(guc, @@ -387,7 +388,7 @@ static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct) int err; desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE; - ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 + + ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_H2G_BUFFER_OFFSET + CTB_H2G_BUFFER_SIZE; size = ct->ctbs.g2h.info.size * sizeof(u32); -- cgit v1.2.3 From 16b6dd1a906f01b98caaf90d3cef2b1cefc15740 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:22 -0700 Subject: drm/xe/vf: Start CTs before resfix VF post migration recovery Before RESFIX_DONE, all CTs stuck in the H2G queue need to be squashed, as they may contain actions which contain invalid GGTT references or are unnecessary after HW change. Starting the CTs clears all H2Gs in the queue. Any lost H2Gs are resubmitted by the GuC submission state machine. v3: - Don't mess with head / tail values (Michal) v4: - Don't mess with broke (Michal) - Add CTB_H2G_BUFFER_OFFSET (Michal) v5: - Adjust commit message (Tomasz) Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251008214532.3442967-25-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 7 +++++ drivers/gpu/drm/xe/xe_guc_ct.c | 60 +++++++++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_guc_ct.h | 1 + 3 files changed, 55 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 34fb8699b7a2..321178b6022a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -1134,6 +1134,11 @@ static int vf_post_migration_fixups(struct xe_gt *gt) return 0; } +static void vf_post_migration_rearm(struct xe_gt *gt) +{ + xe_guc_ct_restart(>->uc.guc.ct); +} + static void vf_post_migration_kickstart(struct xe_gt *gt) { xe_guc_submit_unpause(>->uc.guc); @@ -1185,6 +1190,8 @@ static void vf_post_migration_recovery(struct xe_gt *gt) if (err) goto fail; + vf_post_migration_rearm(gt); + err = vf_post_migration_notify_resfix_done(gt); if (err && err != -EAGAIN) goto fail; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 503cf5cb5d33..3472e4ea2609 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -502,7 +502,7 @@ static void ct_exit_safe_mode(struct xe_guc_ct *ct) xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode disabled\n"); } -int xe_guc_ct_enable(struct xe_guc_ct *ct) +static int __xe_guc_ct_start(struct xe_guc_ct *ct, bool needs_register) { struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); @@ -510,21 +510,29 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) xe_gt_assert(gt, !xe_guc_ct_enabled(ct)); - xe_map_memset(xe, &ct->bo->vmap, 0, 0, xe_bo_size(ct->bo)); - guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); - guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); + if (needs_register) { + xe_map_memset(xe, &ct->bo->vmap, 0, 0, xe_bo_size(ct->bo)); + guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); + guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); - err = guc_ct_ctb_h2g_register(ct); - if (err) - goto err_out; + err = guc_ct_ctb_h2g_register(ct); + if (err) + goto err_out; - err = guc_ct_ctb_g2h_register(ct); - if (err) - goto err_out; + err = guc_ct_ctb_g2h_register(ct); + if (err) + goto err_out; - err = guc_ct_control_toggle(ct, true); - if (err) - goto err_out; + err = guc_ct_control_toggle(ct, true); + if (err) + goto err_out; + } else { + ct->ctbs.h2g.info.broken = false; + ct->ctbs.g2h.info.broken = false; + /* Skip everything in H2G buffer */ + xe_map_memset(xe, &ct->bo->vmap, CTB_H2G_BUFFER_OFFSET, 0, + CTB_H2G_BUFFER_SIZE); + } guc_ct_change_state(ct, XE_GUC_CT_STATE_ENABLED); @@ -556,6 +564,32 @@ err_out: return err; } +/** + * xe_guc_ct_restart() - Restart GuC CT + * @ct: the &xe_guc_ct + * + * Restart GuC CT to an empty state without issuing a CT register MMIO command. + * + * Return: 0 on success, or a negative errno on failure. + */ +int xe_guc_ct_restart(struct xe_guc_ct *ct) +{ + return __xe_guc_ct_start(ct, false); +} + +/** + * xe_guc_ct_enable() - Enable GuC CT + * @ct: the &xe_guc_ct + * + * Enable GuC CT to an empty state and issue a CT register MMIO command. + * + * Return: 0 on success, or a negative errno on failure. + */ +int xe_guc_ct_enable(struct xe_guc_ct *ct) +{ + return __xe_guc_ct_start(ct, true); +} + static void stop_g2h_handler(struct xe_guc_ct *ct) { cancel_work_sync(&ct->g2h_worker); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index f8370fa4727f..ca1ce2b3c354 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -15,6 +15,7 @@ int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct); int xe_guc_ct_init(struct xe_guc_ct *ct); int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); +int xe_guc_ct_restart(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); void xe_guc_ct_stop(struct xe_guc_ct *ct); void xe_guc_ct_flush_and_stop(struct xe_guc_ct *ct); -- cgit v1.2.3 From 7c4b7e34c83bcde12d861cb28292622927839b85 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:23 -0700 Subject: drm/xe/vf: Abort VF post migration recovery on failure If VF post-migration recovery fails, the device is wedged. However, submission queues still need to be enabled for proper cleanup. In such cases, call into the GuC submission backend to restart all queues that were previously paused. v3: - s/Avort/Abort (Tomasz) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-26-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 10 ++++++++++ drivers/gpu/drm/xe/xe_guc_submit.c | 20 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_submit.h | 1 + 3 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 321178b6022a..3b6f56062e21 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -1144,6 +1144,15 @@ static void vf_post_migration_kickstart(struct xe_gt *gt) xe_guc_submit_unpause(>->uc.guc); } +static void vf_post_migration_abort(struct xe_gt *gt) +{ + spin_lock_irq(>->sriov.vf.migration.lock); + WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false); + spin_unlock_irq(>->sriov.vf.migration.lock); + + xe_guc_submit_pause_abort(>->uc.guc); +} + static int vf_post_migration_notify_resfix_done(struct xe_gt *gt) { bool skip_resfix = false; @@ -1202,6 +1211,7 @@ static void vf_post_migration_recovery(struct xe_gt *gt) xe_gt_sriov_notice(gt, "migration recovery ended\n"); return; fail: + vf_post_migration_abort(gt); xe_pm_runtime_put(xe); xe_gt_sriov_err(gt, "migration recovery failed (%pe)\n", ERR_PTR(err)); xe_device_declare_wedged(xe); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 7f0ea35f4f0a..be410a7126c7 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -2098,6 +2098,26 @@ void xe_guc_submit_unpause(struct xe_guc *guc) wake_up_all(&guc->ct.wq); } +/** + * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be aborted + */ +void xe_guc_submit_pause_abort(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; + + xe_sched_submission_start(sched); + if (exec_queue_killed_or_banned_or_wedged(q)) + xe_guc_exec_queue_trigger_cleanup(q); + } + mutex_unlock(&guc->submission_state.lock); +} + static struct xe_exec_queue * g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) { diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index f535fe3895e5..fe82c317048e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -22,6 +22,7 @@ void xe_guc_submit_stop(struct xe_guc *guc); int xe_guc_submit_start(struct xe_guc *guc); void xe_guc_submit_pause(struct xe_guc *guc); void xe_guc_submit_unpause(struct xe_guc *guc); +void xe_guc_submit_pause_abort(struct xe_guc *guc); void xe_guc_submit_wedge(struct xe_guc *guc); int xe_guc_read_stopped(struct xe_guc *guc); -- cgit v1.2.3 From c25c1010df88f6eec5c36344b6ceeb938d3020a9 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:24 -0700 Subject: drm/xe/vf: Replay GuC submission state on pause / unpause Fixup GuC submission pause / unpause functions to properly replay any possible state lost during VF post migration recovery. v3: - Add helpers for revert / replay (Tomasz) - Add comment around WQ NOPs (Tomasz) v7: - Only fixup / replay parallel queues once (Testing) - Skip unpause step on queues created after resfix done (Testing) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-27-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gpu_scheduler.c | 27 +-- drivers/gpu/drm/xe/xe_gpu_scheduler.h | 8 +- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 1 + drivers/gpu/drm/xe/xe_guc_exec_queue_types.h | 15 ++ drivers/gpu/drm/xe/xe_guc_submit.c | 271 +++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_guc_submit.h | 1 + drivers/gpu/drm/xe/xe_sched_job_types.h | 4 + 7 files changed, 295 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index 455ccaf17314..f91e06d03511 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -101,19 +101,6 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) cancel_work_sync(&sched->work_process_msg); } -/** - * xe_sched_submission_stop_async - Stop further runs of submission tasks on a scheduler. - * @sched: the &xe_gpu_scheduler struct instance - * - * This call disables further runs of scheduling work queue. It does not wait - * for any in-progress runs to finish, only makes sure no further runs happen - * afterwards. - */ -void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched) -{ - drm_sched_wqueue_stop(&sched->base); -} - void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched) { drm_sched_resume_timeout(&sched->base, sched->base.timeout); @@ -135,3 +122,17 @@ void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, list_add_tail(&msg->link, &sched->msgs); xe_sched_process_msg_queue(sched); } + +/** + * xe_sched_add_msg_head() - Xe GPU scheduler add message to head of list + * @sched: Xe GPU scheduler + * @msg: Message to add + */ +void xe_sched_add_msg_head(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg) +{ + lockdep_assert_held(&sched->base.job_list_lock); + + list_add(&msg->link, &sched->msgs); + xe_sched_process_msg_queue(sched); +} diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 3a9ff78d9346..9955397aaaa9 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -7,7 +7,7 @@ #define _XE_GPU_SCHEDULER_H_ #include "xe_gpu_scheduler_types.h" -#include "xe_sched_job_types.h" +#include "xe_sched_job.h" int xe_sched_init(struct xe_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, @@ -21,7 +21,6 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched); void xe_sched_submission_start(struct xe_gpu_scheduler *sched); void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); -void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched); void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched); @@ -29,6 +28,8 @@ void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); +void xe_sched_add_msg_head(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg); static inline void xe_sched_msg_lock(struct xe_gpu_scheduler *sched) { @@ -58,7 +59,8 @@ static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched) struct drm_sched_fence *s_fence = s_job->s_fence; struct dma_fence *hw_fence = s_fence->parent; - if (hw_fence && !dma_fence_is_signaled(hw_fence)) + if (to_xe_sched_job(s_job)->skip_emit || + (hw_fence && !dma_fence_is_signaled(hw_fence))) sched->base.ops->run_job(s_job); } } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 3b6f56062e21..ced4884d11c3 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -1137,6 +1137,7 @@ static int vf_post_migration_fixups(struct xe_gt *gt) static void vf_post_migration_rearm(struct xe_gt *gt) { xe_guc_ct_restart(>->uc.guc.ct); + xe_guc_submit_unpause_prepare(>->uc.guc); } static void vf_post_migration_kickstart(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h index c30c0e3ccbbb..a3b034e4b205 100644 --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -51,6 +51,21 @@ struct xe_guc_exec_queue { wait_queue_head_t suspend_wait; /** @suspend_pending: a suspend of the exec_queue is pending */ bool suspend_pending; + /** + * @needs_cleanup: Needs a cleanup message during VF post migration + * recovery. + */ + bool needs_cleanup; + /** + * @needs_suspend: Needs a suspend message during VF post migration + * recovery. + */ + bool needs_suspend; + /** + * @needs_resume: Needs a resume message during VF post migration + * recovery. + */ + bool needs_resume; }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index be410a7126c7..0a9080bba50b 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -142,6 +142,11 @@ static void set_exec_queue_destroyed(struct xe_exec_queue *q) atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); } +static void clear_exec_queue_destroyed(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); +} + static bool exec_queue_banned(struct xe_exec_queue *q) { return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; @@ -222,7 +227,12 @@ static void set_exec_queue_extra_ref(struct xe_exec_queue *q) atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); } -static bool __maybe_unused exec_queue_pending_resume(struct xe_exec_queue *q) +static void clear_exec_queue_extra_ref(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); +} + +static bool exec_queue_pending_resume(struct xe_exec_queue *q) { return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; } @@ -237,7 +247,7 @@ static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); } -static bool __maybe_unused exec_queue_pending_tdr_exit(struct xe_exec_queue *q) +static bool exec_queue_pending_tdr_exit(struct xe_exec_queue *q) { return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_TDR_EXIT; } @@ -799,7 +809,7 @@ static void wq_item_append(struct xe_exec_queue *q) } #define RESUME_PENDING ~0x0ull -static void submit_exec_queue(struct xe_exec_queue *q) +static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_lrc *lrc = q->lrc[0]; @@ -811,10 +821,13 @@ static void submit_exec_queue(struct xe_exec_queue *q) xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - if (xe_exec_queue_is_parallel(q)) - wq_item_append(q); - else - xe_lrc_set_ring_tail(lrc, lrc->ring.tail); + if (!job->skip_emit || job->last_replay) { + if (xe_exec_queue_is_parallel(q)) + wq_item_append(q); + else + xe_lrc_set_ring_tail(lrc, lrc->ring.tail); + job->last_replay = false; + } if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) return; @@ -867,8 +880,10 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { if (!exec_queue_registered(q)) register_exec_queue(q, GUC_CONTEXT_NORMAL); - q->ring_ops->emit_job(job); - submit_exec_queue(q); + if (!job->skip_emit) + q->ring_ops->emit_job(job); + submit_exec_queue(q, job); + job->skip_emit = false; } /* @@ -1592,6 +1607,7 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) #define RESUME 4 #define OPCODE_MASK 0xf #define MSG_LOCKED BIT(8) +#define MSG_HEAD BIT(9) static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) { @@ -1716,12 +1732,24 @@ static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg msg->private_data = q; trace_xe_sched_msg_add(msg); - if (opcode & MSG_LOCKED) + if (opcode & MSG_HEAD) + xe_sched_add_msg_head(&q->guc->sched, msg); + else if (opcode & MSG_LOCKED) xe_sched_add_msg_locked(&q->guc->sched, msg); else xe_sched_add_msg(&q->guc->sched, msg); } +static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, + struct xe_sched_msg *msg, + u32 opcode) +{ + if (!list_empty(&msg->link)) + return; + + guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); +} + static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, u32 opcode) @@ -2009,6 +2037,105 @@ void xe_guc_submit_stop(struct xe_guc *guc) } +static void guc_exec_queue_revert_pending_state_change(struct xe_exec_queue *q) +{ + bool pending_enable, pending_disable, pending_resume; + + pending_enable = exec_queue_pending_enable(q); + pending_resume = exec_queue_pending_resume(q); + + if (pending_enable && pending_resume) + q->guc->needs_resume = true; + + if (pending_enable && !pending_resume && + !exec_queue_pending_tdr_exit(q)) { + clear_exec_queue_registered(q); + if (xe_exec_queue_is_lr(q)) + xe_exec_queue_put(q); + } + + if (pending_enable) { + clear_exec_queue_enabled(q); + clear_exec_queue_pending_resume(q); + clear_exec_queue_pending_tdr_exit(q); + clear_exec_queue_pending_enable(q); + } + + if (exec_queue_destroyed(q) && exec_queue_registered(q)) { + clear_exec_queue_destroyed(q); + if (exec_queue_extra_ref(q)) + xe_exec_queue_put(q); + else + q->guc->needs_cleanup = true; + clear_exec_queue_extra_ref(q); + } + + pending_disable = exec_queue_pending_disable(q); + + if (pending_disable && exec_queue_suspended(q)) { + clear_exec_queue_suspended(q); + q->guc->needs_suspend = true; + } + + if (pending_disable) { + if (!pending_enable) + set_exec_queue_enabled(q); + clear_exec_queue_pending_disable(q); + clear_exec_queue_check_timeout(q); + } + + q->guc->resume_time = 0; +} + +/* + * This function is quite complex but only real way to ensure no state is lost + * during VF resume flows. The function scans the queue state, make adjustments + * as needed, and queues jobs / messages which replayed upon unpause. + */ +static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_job *job; + int i; + + lockdep_assert_held(&guc->submission_state.lock); + + /* Stop scheduling + flush any DRM scheduler operations */ + xe_sched_submission_stop(sched); + if (xe_exec_queue_is_lr(q)) + cancel_work_sync(&q->guc->lr_tdr); + else + cancel_delayed_work_sync(&sched->base.work_tdr); + + guc_exec_queue_revert_pending_state_change(q); + + if (xe_exec_queue_is_parallel(q)) { + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); + + /* + * NOP existing WQ commands that may contain stale GGTT + * addresses. These will be replayed upon unpause. The hardware + * seems to get confused if the WQ head/tail pointers are + * adjusted. + */ + for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) + parallel_write(xe, map, wq[i], + FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | + FIELD_PREP(WQ_LEN_MASK, 0)); + } + + job = xe_sched_first_pending_job(sched); + if (job) { + /* + * Adjust software tail so jobs submitted overwrite previous + * position in ring buffer with new GGTT addresses. + */ + for (i = 0; i < q->width; ++i) + q->lrc[i]->ring.tail = job->ptrs[i].head; + } +} + /** * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. * @guc: the &xe_guc struct instance whose scheduler is to be disabled @@ -2018,8 +2145,17 @@ void xe_guc_submit_pause(struct xe_guc *guc) struct xe_exec_queue *q; unsigned long index; - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - xe_sched_submission_stop_async(&q->guc->sched); + xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + + guc_exec_queue_pause(guc, q); + } + mutex_unlock(&guc->submission_state.lock); } static void guc_exec_queue_start(struct xe_exec_queue *q) @@ -2076,11 +2212,97 @@ int xe_guc_submit_start(struct xe_guc *guc) return 0; } -static void guc_exec_queue_unpause(struct xe_exec_queue *q) +static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, + struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct drm_sched_job *s_job; + struct xe_sched_job *job = NULL; + + list_for_each_entry(s_job, &sched->base.pending_list, list) { + job = to_xe_sched_job(s_job); + + q->ring_ops->emit_job(job); + job->skip_emit = true; + } + + if (job) + job->last_replay = true; +} + +/** + * xe_guc_submit_unpause_prepare - Prepare unpause submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause + */ +void xe_guc_submit_unpause_prepare(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + + guc_exec_queue_unpause_prepare(guc, q); + } + mutex_unlock(&guc->submission_state.lock); +} + +static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_msg *msg; + + if (q->guc->needs_cleanup) { + msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; + + guc_exec_queue_add_msg(q, msg, CLEANUP); + q->guc->needs_cleanup = false; + } + + if (q->guc->needs_suspend) { + msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; + + xe_sched_msg_lock(sched); + guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); + xe_sched_msg_unlock(sched); + + q->guc->needs_suspend = false; + } + + /* + * The resume must be in the message queue before the suspend as it is + * not possible for a resume to be issued if a suspend pending is, but + * the inverse is possible. + */ + if (q->guc->needs_resume) { + msg = q->guc->static_msgs + STATIC_MSG_RESUME; + + xe_sched_msg_lock(sched); + guc_exec_queue_try_add_msg_head(q, msg, RESUME); + xe_sched_msg_unlock(sched); + + q->guc->needs_resume = false; + } +} + +static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; + bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); + + lockdep_assert_held(&guc->submission_state.lock); + xe_sched_resubmit_jobs(sched); + guc_exec_queue_replay_pending_state_change(q); xe_sched_submission_start(sched); + if (needs_tdr) + xe_guc_exec_queue_trigger_cleanup(q); + xe_sched_submission_resume_tdr(sched); } /** @@ -2092,10 +2314,19 @@ void xe_guc_submit_unpause(struct xe_guc *guc) struct xe_exec_queue *q; unsigned long index; - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - guc_exec_queue_unpause(q); + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* + * Prevent redundant attempts to stop parallel queues, or queues + * created after resfix done. + */ + if (q->guc->id != index || + !READ_ONCE(q->guc->sched.base.pause_submit)) + continue; - wake_up_all(&guc->ct.wq); + guc_exec_queue_unpause(guc, q); + } + mutex_unlock(&guc->submission_state.lock); } /** @@ -2111,6 +2342,10 @@ void xe_guc_submit_pause_abort(struct xe_guc *guc) xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { struct xe_gpu_scheduler *sched = &q->guc->sched; + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + xe_sched_submission_start(sched); if (exec_queue_killed_or_banned_or_wedged(q)) xe_guc_exec_queue_trigger_cleanup(q); @@ -2696,6 +2931,10 @@ int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) mutex_lock(&guc->submission_state.lock); xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); if (err) break; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index fe82c317048e..b49a2748ec46 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -22,6 +22,7 @@ void xe_guc_submit_stop(struct xe_guc *guc); int xe_guc_submit_start(struct xe_guc *guc); void xe_guc_submit_pause(struct xe_guc *guc); void xe_guc_submit_unpause(struct xe_guc *guc); +void xe_guc_submit_unpause_prepare(struct xe_guc *guc); void xe_guc_submit_pause_abort(struct xe_guc *guc); void xe_guc_submit_wedge(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index 7ce58765a34a..13e7a12b03ad 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -63,6 +63,10 @@ struct xe_sched_job { bool ring_ops_flush_tlb; /** @ggtt: mapped in ggtt. */ bool ggtt; + /** @skip_emit: skip emitting the job */ + bool skip_emit; + /** @last_replay: last job being replayed */ + bool last_replay; /** @ptrs: per instance pointers. */ struct xe_job_ptrs ptrs[]; }; -- cgit v1.2.3 From 3c1fa4aa60b146d1fa73b2b87064303f8e4b7952 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:25 -0700 Subject: drm/xe: Move queue init before LRC creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A queue must be in the submission backend's tracking state before the LRC is created to avoid a race condition where the LRC's GGTT addresses are not properly fixed up during VF post-migration recovery. Move the queue initialization—which adds the queue to the submission backend's tracking state—before LRC creation. Also wait on pending GGTT fixups before allocating LRCs to avoid racing with fixups. v2: - Wait on VF GGTT fixes before creating LRC (testing) v5: - Adjust comment in code (Tomasz) - Reduce race window v7: - Only wakeup waiters in recovery path (CI) - Wakeup waiters on abort - Use GT warn on (Michal) - Fix kernel doc for LRC ring size function (Tomasz) v8: - Guard against migration not supported or no memirq (CI) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-28-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 45 ++++++++++++++++++++++++------- drivers/gpu/drm/xe/xe_execlist.c | 2 +- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 45 ++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_gt_sriov_vf.h | 2 ++ drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h | 5 ++++ drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- drivers/gpu/drm/xe/xe_lrc.h | 10 +++++++ 7 files changed, 98 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7621089a47fe..90cbc95f8e2e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -15,6 +15,7 @@ #include "xe_dep_scheduler.h" #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_sriov_vf.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_hw_engine_group.h" #include "xe_hw_fence.h" @@ -205,17 +206,34 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL)) flags |= XE_LRC_CREATE_USER_CTX; + err = q->ops->init(q); + if (err) + return err; + + /* + * This must occur after q->ops->init to avoid race conditions during VF + * post-migration recovery, as the fixups for the LRC GGTT addresses + * depend on the queue being present in the backend tracking structure. + * + * In addition to above, we must wait on inflight GGTT changes to avoid + * writing out stale values here. Such wait provides a solid solution + * (without a race) only if the function can detect migration instantly + * from the moment vCPU resumes execution. + */ for (i = 0; i < q->width; ++i) { - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags); - if (IS_ERR(q->lrc[i])) { - err = PTR_ERR(q->lrc[i]); + struct xe_lrc *lrc; + + xe_gt_sriov_vf_wait_valid_ggtt(q->gt); + lrc = xe_lrc_create(q->hwe, q->vm, xe_lrc_ring_size(), + q->msix_vec, flags); + if (IS_ERR(lrc)) { + err = PTR_ERR(lrc); goto err_lrc; } - } - err = q->ops->init(q); - if (err) - goto err_lrc; + /* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */ + WRITE_ONCE(q->lrc[i], lrc); + } return 0; @@ -1121,9 +1139,16 @@ int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) int err = 0; for (i = 0; i < q->width; ++i) { - xe_lrc_update_memirq_regs_with_address(q->lrc[i], q->hwe, scratch); - xe_lrc_update_hwctx_regs_with_address(q->lrc[i]); - err = xe_lrc_setup_wa_bb_with_scratch(q->lrc[i], q->hwe, scratch); + struct xe_lrc *lrc; + + /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ + lrc = READ_ONCE(q->lrc[i]); + if (!lrc) + continue; + + xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch); + xe_lrc_update_hwctx_regs_with_address(lrc); + err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch); if (err) break; } diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index f83d421ac9d3..769d05517f93 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -339,7 +339,7 @@ static int execlist_exec_queue_init(struct xe_exec_queue *q) const struct drm_sched_init_args args = { .ops = &drm_sched_ops, .num_rqs = 1, - .credit_limit = q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, + .credit_limit = xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, .hang_limit = XE_SCHED_HANG_LIMIT, .timeout = XE_SCHED_JOB_TIMEOUT, .name = q->hwe->name, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index ced4884d11c3..edd98a4da306 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -482,6 +482,12 @@ static int vf_get_ggtt_info(struct xe_gt *gt) xe_tile_sriov_vf_fixup_ggtt_nodes_locked(gt_to_tile(gt), shift); } + if (xe_sriov_vf_migration_supported(gt_to_xe(gt))) { + WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false); + smp_wmb(); /* Ensure above write visible before wake */ + wake_up_all(>->sriov.vf.migration.wq); + } + return 0; } @@ -731,7 +737,8 @@ static void vf_start_migration_recovery(struct xe_gt *gt) !gt->sriov.vf.migration.recovery_teardown) { gt->sriov.vf.migration.recovery_queued = true; WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true); - smp_wmb(); /* Ensure above write visable before wake */ + WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, true); + smp_wmb(); /* Ensure above writes visable before wake */ xe_guc_ct_wake_waiters(>->uc.guc.ct); @@ -1149,8 +1156,11 @@ static void vf_post_migration_abort(struct xe_gt *gt) { spin_lock_irq(>->sriov.vf.migration.lock); WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false); + WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false); spin_unlock_irq(>->sriov.vf.migration.lock); + wake_up_all(>->sriov.vf.migration.wq); + xe_guc_submit_pause_abort(>->uc.guc); } @@ -1259,6 +1269,7 @@ int xe_gt_sriov_vf_init_early(struct xe_gt *gt) gt->sriov.vf.migration.scratch = buf; spin_lock_init(>->sriov.vf.migration.lock); INIT_WORK(>->sriov.vf.migration.worker, migration_worker_func); + init_waitqueue_head(>->sriov.vf.migration.wq); return 0; } @@ -1308,3 +1319,35 @@ bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt) return READ_ONCE(gt->sriov.vf.migration.recovery_inprogress); } + +static bool vf_valid_ggtt(struct xe_gt *gt) +{ + struct xe_memirq *memirq = >_to_tile(gt)->memirq; + bool irq_pending = xe_device_uses_memirq(gt_to_xe(gt)) && + xe_memirq_guc_sw_int_0_irq_pending(memirq, >->uc.guc); + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + if (irq_pending || READ_ONCE(gt->sriov.vf.migration.ggtt_need_fixes)) + return false; + + return true; +} + +/** + * xe_gt_sriov_vf_wait_valid_ggtt() - VF wait for valid GGTT addresses + * @gt: the &xe_gt + */ +void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt) +{ + int ret; + + if (!IS_SRIOV_VF(gt_to_xe(gt)) || + !xe_sriov_vf_migration_supported(gt_to_xe(gt))) + return; + + ret = wait_event_interruptible_timeout(gt->sriov.vf.migration.wq, + vf_valid_ggtt(gt), + HZ * 5); + xe_gt_WARN_ON(gt, !ret); +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index 1d2eaa52f804..af40276790fa 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -38,4 +38,6 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p); void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p); void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt); + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index bdd9b968f204..420b0e6089de 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -7,6 +7,7 @@ #define _XE_GT_SRIOV_VF_TYPES_H_ #include +#include #include #include "xe_uc_fw_types.h" @@ -47,6 +48,8 @@ struct xe_gt_sriov_vf_migration { struct work_struct worker; /** @lock: Protects recovery_queued, teardown */ spinlock_t lock; + /** @wq: wait queue for migration fixes */ + wait_queue_head_t wq; /** @scratch: Scratch memory for VF recovery */ void *scratch; /** @recovery_teardown: VF post migration recovery is being torn down */ @@ -55,6 +58,8 @@ struct xe_gt_sriov_vf_migration { bool recovery_queued; /** @recovery_inprogress: VF post migration recovery in progress */ bool recovery_inprogress; + /** @ggtt_need_fixes: VF GGTT needs fixes */ + bool ggtt_need_fixes; }; /** diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 0a9080bba50b..b4c09604c5ea 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1670,7 +1670,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, - NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64, + NULL, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, q->name, gt_to_xe(q->gt)->drm.dev); if (err) diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 21a3daab0154..2fb628da5c43 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -76,6 +76,16 @@ static inline void xe_lrc_put(struct xe_lrc *lrc) kref_put(&lrc->refcount, xe_lrc_destroy); } +/** + * xe_lrc_ring_size() - Xe LRC ring size + * + * Return: Size of LRC ring buffer + */ +static inline size_t xe_lrc_ring_size(void) +{ + return SZ_16K; +} + size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class); u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); u32 xe_lrc_regs_offset(struct xe_lrc *lrc); -- cgit v1.2.3 From 1521fad9ad876238161fc25d91b7274a4a0b63d9 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:26 -0700 Subject: drm/xe/vf: Add debug prints for GuC replaying state during VF recovery Helpful to manually verify the GuC state machine can correctly replay the state during a VF post-migration recovery. All replay paths have been manually verified as triggered and working during testing. Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-29-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index b4c09604c5ea..e9aa0625ce60 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -2037,21 +2037,27 @@ void xe_guc_submit_stop(struct xe_guc *guc) } -static void guc_exec_queue_revert_pending_state_change(struct xe_exec_queue *q) +static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, + struct xe_exec_queue *q) { bool pending_enable, pending_disable, pending_resume; pending_enable = exec_queue_pending_enable(q); pending_resume = exec_queue_pending_resume(q); - if (pending_enable && pending_resume) + if (pending_enable && pending_resume) { q->guc->needs_resume = true; + xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", + q->guc->id); + } if (pending_enable && !pending_resume && !exec_queue_pending_tdr_exit(q)) { clear_exec_queue_registered(q); if (xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); + xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", + q->guc->id); } if (pending_enable) { @@ -2059,6 +2065,8 @@ static void guc_exec_queue_revert_pending_state_change(struct xe_exec_queue *q) clear_exec_queue_pending_resume(q); clear_exec_queue_pending_tdr_exit(q); clear_exec_queue_pending_enable(q); + xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", + q->guc->id); } if (exec_queue_destroyed(q) && exec_queue_registered(q)) { @@ -2068,6 +2076,8 @@ static void guc_exec_queue_revert_pending_state_change(struct xe_exec_queue *q) else q->guc->needs_cleanup = true; clear_exec_queue_extra_ref(q); + xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", + q->guc->id); } pending_disable = exec_queue_pending_disable(q); @@ -2075,6 +2085,8 @@ static void guc_exec_queue_revert_pending_state_change(struct xe_exec_queue *q) if (pending_disable && exec_queue_suspended(q)) { clear_exec_queue_suspended(q); q->guc->needs_suspend = true; + xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", + q->guc->id); } if (pending_disable) { @@ -2082,6 +2094,8 @@ static void guc_exec_queue_revert_pending_state_change(struct xe_exec_queue *q) set_exec_queue_enabled(q); clear_exec_queue_pending_disable(q); clear_exec_queue_check_timeout(q); + xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", + q->guc->id); } q->guc->resume_time = 0; @@ -2107,7 +2121,7 @@ static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) else cancel_delayed_work_sync(&sched->base.work_tdr); - guc_exec_queue_revert_pending_state_change(q); + guc_exec_queue_revert_pending_state_change(guc, q); if (xe_exec_queue_is_parallel(q)) { struct xe_device *xe = guc_to_xe(guc); @@ -2222,6 +2236,9 @@ static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, list_for_each_entry(s_job, &sched->base.pending_list, list) { job = to_xe_sched_job(s_job); + xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", + q->guc->id, xe_sched_job_seqno(job)); + q->ring_ops->emit_job(job); job->skip_emit = true; } -- cgit v1.2.3 From 3b56911960b3c938d2eed70526ef4bc496520123 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:27 -0700 Subject: drm/xe/vf: Workaround for race condition in GuC firmware during VF pause MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A race condition exists where a paused VF's H2G request can be processed and subsequently rejected. This rejection results in a FAST_REQ failure being delivered to the KMD, which then terminates the CT via a dead worker and triggers a GT reset—an undesirable outcome. This workaround mitigates the issue by checking if a VF post-migration recovery is in progress and aborting these adverse actions accordingly. The GuC firmware will address this bug in an upcoming release. Once that version is available and VF migration depends on it, this workaround can be safely removed. Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-30-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 3472e4ea2609..3ae1e8db143a 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1398,6 +1398,10 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) fast_req_report(ct, fence); + /* FIXME: W/A race in the GuC, will get in firmware soon */ + if (xe_gt_recovery_pending(gt)) + return 0; + CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE); return -EPROTO; -- cgit v1.2.3 From 673167d9f08378b7c619e332fe9eea230425d330 Mon Sep 17 00:00:00 2001 From: Satyanarayana K V P Date: Wed, 8 Oct 2025 14:45:28 -0700 Subject: drm/xe: Use PPGTT addresses for TLB invalidation to avoid GGTT fixups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The migrate VM builds the CCS metadata save/restore batch buffer (BB) in advance and retains it so the GuC can submit it directly when saving a VM’s state. When a VM migrates between VFs, the GGTT base can change. Any GGTT-based addresses embedded in the BB would then have to be parsed and patched. Use PPGTT addresses in the BB (including for TLB invalidation) so the BB remains GGTT-agnostic and requires no address fixups during migration. Signed-off-by: Satyanarayana K V P Signed-off-by: Matthew Brost Cc: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251008214532.3442967-31-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 1d667fa36cf3..ad03afb5145f 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -980,15 +980,27 @@ struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate) return migrate->q->lrc[0]; } -static int emit_flush_invalidate(struct xe_exec_queue *q, u32 *dw, int i, - u32 flags) +static u64 migrate_vm_ppgtt_addr_tlb_inval(void) { - struct xe_lrc *lrc = xe_exec_queue_lrc(q); + /* + * The migrate VM is self-referential so it can modify its own PTEs (see + * pte_update_size() or emit_pte() functions). We reserve NUM_KERNEL_PDE + * entries for kernel operations (copies, clears, CCS migrate), and + * suballocate the rest to user operations (binds/unbinds). With + * NUM_KERNEL_PDE = 15, NUM_KERNEL_PDE - 1 is already used for PTE updates, + * so assign NUM_KERNEL_PDE - 2 for TLB invalidation. + */ + return (NUM_KERNEL_PDE - 2) * XE_PAGE_SIZE; +} + +static int emit_flush_invalidate(u32 *dw, int i, u32 flags) +{ + u64 addr = migrate_vm_ppgtt_addr_tlb_inval(); + dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW | flags; - dw[i++] = lower_32_bits(xe_lrc_start_seqno_ggtt_addr(lrc)) | - MI_FLUSH_DW_USE_GTT; - dw[i++] = upper_32_bits(xe_lrc_start_seqno_ggtt_addr(lrc)); + dw[i++] = lower_32_bits(addr); + dw[i++] = upper_32_bits(addr); dw[i++] = MI_NOOP; dw[i++] = MI_NOOP; @@ -1101,11 +1113,11 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); - bb->len = emit_flush_invalidate(q, bb->cs, bb->len, flush_flags); + bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, src_L0_ofs, dst_is_pltt, src_L0, ccs_ofs, true); - bb->len = emit_flush_invalidate(q, bb->cs, bb->len, flush_flags); + bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); size -= src_L0; } -- cgit v1.2.3 From efd38d619a03b2aadf124a4ddeb4054a447e2c8c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:29 -0700 Subject: drm/xe/vf: Use primary GT ordered work queue on media GT on PTL VF VF CCS restore is a primary GT operation on which the media GT depends. Therefore, it doesn't make much sense to run these operations in parallel. To address this, point the media GT's ordered work queue to the primary GT's ordered work queue on platforms that require (PTL VFs) CCS restore as part of VF post-migration recovery. v7: - Remove bool from xe_gt_alloc (Lucas) v9: - Fix typo (Lucas) Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251008214532.3442967-32-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_gt.c | 18 ++++++++++++++---- drivers/gpu/drm/xe/xe_pci.c | 2 ++ drivers/gpu/drm/xe/xe_pci_types.h | 1 + 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 54d4034659cb..9e3666a226da 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -329,6 +329,8 @@ struct xe_device { u8 skip_mtcfg:1; /** @info.skip_pcode: skip access to PCODE uC */ u8 skip_pcode:1; + /** @info.needs_shared_vf_gt_wq: needs shared GT WQ on VF */ + u8 needs_shared_vf_gt_wq:1; } info; /** @wa_active: keep track of active workarounds */ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 6951fedd4350..d8e94fb8b9bd 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -67,7 +67,11 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile) { - struct drm_device *drm = &tile_to_xe(tile)->drm; + struct xe_device *xe = tile_to_xe(tile); + struct drm_device *drm = &xe->drm; + bool shared_wq = xe->info.needs_shared_vf_gt_wq && tile->primary_gt && + IS_SRIOV_VF(xe); + struct workqueue_struct *ordered_wq; struct xe_gt *gt; gt = drmm_kzalloc(drm, sizeof(*gt), GFP_KERNEL); @@ -75,9 +79,15 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile) return ERR_PTR(-ENOMEM); gt->tile = tile; - gt->ordered_wq = drmm_alloc_ordered_workqueue(drm, "gt-ordered-wq", WQ_MEM_RECLAIM); - if (IS_ERR(gt->ordered_wq)) - return ERR_CAST(gt->ordered_wq); + if (shared_wq && tile->primary_gt->ordered_wq) + ordered_wq = tile->primary_gt->ordered_wq; + else + ordered_wq = drmm_alloc_ordered_workqueue(drm, "gt-ordered-wq", + WQ_MEM_RECLAIM); + if (IS_ERR(ordered_wq)) + return ERR_CAST(ordered_wq); + + gt->ordered_wq = ordered_wq; return gt; } diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index be91343829dd..687e919f8a8d 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -347,6 +347,7 @@ static const struct xe_device_desc ptl_desc = { .has_sriov = true, .max_gt_per_tile = 2, .needs_scratch = true, + .needs_shared_vf_gt_wq = true, }; #undef PLATFORM @@ -596,6 +597,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; xe->info.needs_scratch = desc->needs_scratch; + xe->info.needs_shared_vf_gt_wq = desc->needs_shared_vf_gt_wq; xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.probe_display && diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 9b9766a3baa3..b11bf6abda5b 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -48,6 +48,7 @@ struct xe_device_desc { u8 skip_guc_pc:1; u8 skip_mtcfg:1; u8 skip_pcode:1; + u8 needs_shared_vf_gt_wq:1; }; struct xe_graphics_desc { -- cgit v1.2.3 From a093570ecd2239ea19db5a158ae616f3dcbec048 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:30 -0700 Subject: drm/xe/vf: Ensure media GT VF recovery runs after primary GT on PTL It is possible that the media GT's VF post-migration recovery work item gets scheduled before the primary GT's work item. Since the media GT depends on the primary GT's work item to complete CCS restore, if the media GT's work item is scheduled first, detect this condition and re-queue the media GT's work item for a later time. v5: - Adjust debug message (Tomasz) Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://lore.kernel.org/r/20251008214532.3442967-33-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index edd98a4da306..b527f78da295 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -1107,8 +1107,22 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) pf_version->major, pf_version->minor); } -static void vf_post_migration_shutdown(struct xe_gt *gt) +static bool vf_post_migration_shutdown(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); + + /* + * On platforms where CCS must be restored by the primary GT, the media + * GT's VF post-migration recovery must run afterward. Detect this case + * and re-queue the media GT's restore work item if necessary. + */ + if (xe->info.needs_shared_vf_gt_wq && xe_gt_is_media_type(gt)) { + struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; + + if (xe_gt_sriov_vf_recovery_pending(primary_gt)) + return true; + } + spin_lock_irq(>->sriov.vf.migration.lock); gt->sriov.vf.migration.recovery_queued = false; spin_unlock_irq(>->sriov.vf.migration.lock); @@ -1116,6 +1130,8 @@ static void vf_post_migration_shutdown(struct xe_gt *gt) xe_guc_ct_flush_and_stop(>->uc.guc.ct); xe_guc_submit_pause(>->uc.guc); xe_tlb_inval_reset(>->tlb_inval); + + return false; } static size_t post_migration_scratch_size(struct xe_device *xe) @@ -1194,11 +1210,14 @@ static void vf_post_migration_recovery(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); int err; + bool retry; xe_gt_sriov_dbg(gt, "migration recovery in progress\n"); xe_pm_runtime_get(xe); - vf_post_migration_shutdown(gt); + retry = vf_post_migration_shutdown(gt); + if (retry) + goto queue; if (!xe_sriov_vf_migration_supported(xe)) { xe_gt_sriov_err(gt, "migration is not supported\n"); @@ -1226,6 +1245,12 @@ fail: xe_pm_runtime_put(xe); xe_gt_sriov_err(gt, "migration recovery failed (%pe)\n", ERR_PTR(err)); xe_device_declare_wedged(xe); + return; + +queue: + xe_gt_sriov_info(gt, "Re-queuing migration recovery\n"); + queue_work(gt->ordered_wq, >->sriov.vf.migration.worker); + xe_pm_runtime_put(xe); } static void migration_worker_func(struct work_struct *w) -- cgit v1.2.3 From 8f1e1e524cd8c0456edf2917c4296ad0260d7051 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 8 Oct 2025 14:45:31 -0700 Subject: drm/xe/vf: Rebase CCS save/restore BB GGTT addresses Rebase the CCS save/restore BB's GGTT addresses during VF post-migration recovery by setting the software ring tail to zero, the LRC ring head to zero, and rewriting the jump-to-BB instructions. Signed-off-by: Matthew Brost Reviewed-by: Satyanarayana K V P Link: https://lore.kernel.org/r/20251008214532.3442967-34-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 4 ++++ drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 28 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_vf_ccs.h | 1 + 3 files changed, 33 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index b527f78da295..46518e629ba3 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -34,6 +34,7 @@ #include "xe_pm.h" #include "xe_sriov.h" #include "xe_sriov_vf.h" +#include "xe_sriov_vf_ccs.h" #include "xe_tile_sriov_vf.h" #include "xe_tlb_inval.h" #include "xe_uc_fw.h" @@ -1149,6 +1150,9 @@ static int vf_post_migration_fixups(struct xe_gt *gt) if (err) return err; + if (xe_gt_is_main_type(gt)) + xe_sriov_vf_ccs_rebase(gt_to_xe(gt)); + xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt); err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); if (err) diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c index 8dec616c37c9..790249801364 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -175,6 +175,15 @@ static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx) struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); u32 dw[10], i = 0; + /* + * XXX: Save/restore fixes — for some reason, the GuC only accepts the + * save/restore context if the LRC head pointer is zero. This is evident + * from repeated VF migrations failing when the LRC head pointer is + * non-zero. + */ + lrc->ring.tail = 0; + xe_lrc_set_ring_head(lrc, 0); + dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE; dw[i++] = MI_BATCH_BUFFER_START | XE_INSTR_NUM_DW(3); dw[i++] = lower_32_bits(addr); @@ -186,6 +195,25 @@ static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx) xe_lrc_set_ring_tail(lrc, lrc->ring.tail); } +/** + * xe_sriov_vf_ccs_rebase - Rebase GGTT addresses for CCS save / restore + * @xe: the &xe_device. + */ +void xe_sriov_vf_ccs_rebase(struct xe_device *xe) +{ + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + + if (!IS_VF_CCS_READY(xe)) + return; + + for_each_ccs_rw_ctx(ctx_id) { + struct xe_sriov_vf_ccs_ctx *ctx = + &xe->sriov.vf.ccs.contexts[ctx_id]; + + ccs_rw_update_ring(ctx); + } +} + static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx) { int ctx_type; diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h index 0745c0ff0228..f8ca6efce9ee 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h @@ -18,6 +18,7 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe); int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo); int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo); int xe_sriov_vf_ccs_register_context(struct xe_device *xe); +void xe_sriov_vf_ccs_rebase(struct xe_device *xe); void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p); static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe) -- cgit v1.2.3 From 60e26675578f5a3d7375f5f80d2d70c33eca2ec7 Mon Sep 17 00:00:00 2001 From: Satyanarayana K V P Date: Wed, 8 Oct 2025 14:45:32 -0700 Subject: drm/xe/guc: Increase wait timeout to 2sec after BUSY reply from GuC Some VF2GUC actions may take longer to process. Increase default timeout after received BUSY indication to 2sec to cover all worst case scenarios. Signed-off-by: Satyanarayana K V P Signed-off-by: Matthew Brost Cc: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251008214532.3442967-35-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index d5adbbb013ec..d94490979adc 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1439,7 +1439,7 @@ timeout: BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1); ret = xe_mmio_wait32(mmio, reply_reg, resp_mask, resp_mask, - 1000000, &header, false); + 2000000, &header, false); if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != GUC_HXG_ORIGIN_GUC)) -- cgit v1.2.3 From e07e10ae83bdf429f59c8c149173a8c4f29c481e Mon Sep 17 00:00:00 2001 From: Ketil Johnsen Date: Wed, 8 Oct 2025 12:51:11 +0200 Subject: drm/panthor: Ensure MCU is disabled on suspend Currently the Panthor driver needs the GPU to be powered down between suspend and resume. If this is not done, then the MCU_CONTROL register will be preserved as AUTO, which again will cause a premature FW boot on resume. The FW will go directly into fatal state in this case. This case needs to be handled as there is no guarantee that the GPU will be powered down after the suspend callback on all platforms. The fix is to call panthor_fw_stop() in "pre-reset" path to ensure the MCU_CONTROL register is cleared (set DISABLE). This matches well with the already existing call to panthor_fw_start() from the "post-reset" path. Signed-off-by: Ketil Johnsen Acked-by: Boris Brezillon Reviewed-by: Steven Price Fixes: 2718d91816ee ("drm/panthor: Add the FW logical block") Signed-off-by: Steven Price Link: https://lore.kernel.org/r/20251008105112.4077015-1-ketil.johnsen@arm.com --- drivers/gpu/drm/panthor/panthor_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index 36f1034839c2..44a995835188 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -1099,6 +1099,7 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) } panthor_job_irq_suspend(&ptdev->fw->irq); + panthor_fw_stop(ptdev); } /** -- cgit v1.2.3 From 146046907b56578263434107f5a7d5051847c459 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 12 Sep 2025 15:31:45 -0700 Subject: drm/xe: Increase global invalidation timeout to 1000us The previous timeout of 500us seems to be too small; panning the map in the Roll20 VTT in Firefox on a KDE/Wayland desktop reliably triggered timeouts within a few seconds of usage, causing the monitor to freeze and the following to be printed to dmesg: [Jul30 13:44] xe 0000:03:00.0: [drm] *ERROR* GT0: Global invalidation timeout [Jul30 13:48] xe 0000:03:00.0: [drm] *ERROR* [CRTC:82:pipe A] flip_done timed out I haven't hit a single timeout since increasing it to 1000us even after several multi-hour testing sessions. Fixes: 0dd2dd0182bc ("drm/xe: Move DSB l2 flush to a more sensible place") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5710 Signed-off-by: Kenneth Graunke Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Reviewed-by: Shuicheng Lin Link: https://lore.kernel.org/r/20250912223254.147940-1-kenneth@whitecape.org Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index ab0302d5f25f..6f8f72fd1b13 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -1073,7 +1073,7 @@ void xe_device_l2_flush(struct xe_device *xe) spin_lock(>->global_invl_lock); xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); - if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) + if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 1000, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); spin_unlock(>->global_invl_lock); -- cgit v1.2.3 From bb642e2d300ee27dcede65cda7ffc47a7047bd69 Mon Sep 17 00:00:00 2001 From: Amit Chaudhary Date: Fri, 26 Sep 2025 12:08:22 -0700 Subject: nvme-multipath: Skip nr_active increments in RETRY disposition For queue-depth I/O policy, this patch fixes unbalanced I/Os across nvme multipaths. Issue Description: The RETRY disposition incorrectly increments ns->ctrl->nr_active counter and reinitializes iostat start-time. In such cases nr_active counter never goes back to zero until that path disconnects and reconnects. Such a path is not chosen for new I/Os if multiple RETRY cases on a given a path cause its queue-depth counter to be artificially higher compared to other paths. This leads to unbalanced I/Os across paths. The patch skips incrementing nr_active if NVME_MPATH_CNT_ACTIVE is already set. And it skips restarting io stats if NVME_MPATH_IO_STATS is already set. base-commit: e989a3da2d371a4b6597ee8dee5c72e407b4db7a Fixes: d4d957b53d91eeb ("nvme-multipath: support io stats on the mpath device") Signed-off-by: Amit Chaudhary Reviewed-by: Randy Jennings Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 3da980dc60d9..543e17aead12 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -182,12 +182,14 @@ void nvme_mpath_start_request(struct request *rq) struct nvme_ns *ns = rq->q->queuedata; struct gendisk *disk = ns->head->disk; - if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) { + if ((READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) && + !(nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE)) { atomic_inc(&ns->ctrl->nr_active); nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE; } - if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq)) + if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq) || + (nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) return; nvme_req(rq)->flags |= NVME_MPATH_IO_STATS; -- cgit v1.2.3 From 8b9ba8d6d95fe75fed6b0480bb03da4b321bea08 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Oct 2025 04:06:18 -0700 Subject: drm/xe: Don't allow evicting of BOs in same VM in array of VM binds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An array of VM binds can potentially evict other buffer objects (BOs) within the same VM under certain conditions, which may lead to NULL pointer dereferences later in the bind pipeline. To prevent this, clear the allow_res_evict flag in the xe_bo_validate call. v2: - Invert polarity of no_res_evict (Thomas) - Add comment in code explaining issue (Thomas) Cc: stable@vger.kernel.org Reported-by: Paulo Zanoni Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6268 Fixes: 774b5fa509a9 ("drm/xe: Avoid evicting object of the same vm in none fault mode") Fixes: 77f2ef3f16f5 ("drm/xe: Lock all gpuva ops during VM bind IOCTL") Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Tested-by: Paulo Zanoni Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20251009110618.3481870-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 32 +++++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_vm_types.h | 1 + 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index faca626702b8..179758ca7cb8 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2858,7 +2858,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, } static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, - bool validate) + bool res_evict, bool validate) { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); @@ -2869,7 +2869,8 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) err = xe_bo_validate(bo, vm, - !xe_vm_in_preempt_fence_mode(vm), exec); + !xe_vm_in_preempt_fence_mode(vm) && + res_evict, exec); } return err; @@ -2939,14 +2940,23 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) } static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, - struct xe_vma_op *op) + struct xe_vma_ops *vops, struct xe_vma_op *op) { int err = 0; + bool res_evict; + + /* + * We only allow evicting a BO within the VM if it is not part of an + * array of binds, as an array of binds can evict another BO within the + * bind. + */ + res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); switch (op->base.op) { case DRM_GPUVA_OP_MAP: if (!op->map.invalidate_on_bind) err = vma_lock_and_validate(exec, op->map.vma, + res_evict, !xe_vm_in_fault_mode(vm) || op->map.immediate); break; @@ -2957,11 +2967,13 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.remap.unmap->va), - false); + res_evict, false); if (!err && op->remap.prev) - err = vma_lock_and_validate(exec, op->remap.prev, true); + err = vma_lock_and_validate(exec, op->remap.prev, + res_evict, true); if (!err && op->remap.next) - err = vma_lock_and_validate(exec, op->remap.next, true); + err = vma_lock_and_validate(exec, op->remap.next, + res_evict, true); break; case DRM_GPUVA_OP_UNMAP: err = check_ufence(gpuva_to_vma(op->base.unmap.va)); @@ -2970,7 +2982,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.unmap.va), - false); + res_evict, false); break; case DRM_GPUVA_OP_PREFETCH: { @@ -2985,7 +2997,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.prefetch.va), - false); + res_evict, false); if (!err && !xe_vma_has_no_bo(vma)) err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region], @@ -3031,7 +3043,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, return err; list_for_each_entry(op, &vops->list, link) { - err = op_lock_and_prep(exec, vm, op); + err = op_lock_and_prep(exec, vm, vops, op); if (err) return err; } @@ -3664,6 +3676,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); + if (args->num_binds > 1) + vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index da39940501d8..413353e1c225 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -476,6 +476,7 @@ struct xe_vma_ops { /** @flag: signify the properties within xe_vma_ops*/ #define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) #define XE_VMA_OPS_FLAG_MADVISE BIT(1) +#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2) u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ -- cgit v1.2.3 From 812258ff4166bcd41c7d44707e0591f9ae32ac8c Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 8 Sep 2025 14:12:35 +0100 Subject: rust: cfi: only 64-bit arm and x86 support CFI_CLANG The kernel uses the standard rustc targets for non-x86 targets, and out of those only 64-bit arm's target has kcfi support enabled. For x86, the custom 64-bit target enables kcfi. The HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC config option that allows CFI_CLANG to be used in combination with RUST does not check whether the rustc target supports kcfi. This breaks the build on riscv (and presumably 32-bit arm) when CFI_CLANG and RUST are enabled at the same time. Ordinarily, a rustc-option check would be used to detect target support but unfortunately rustc-option filters out the target for reasons given in commit 46e24a545cdb4 ("rust: kasan/kbuild: fix missing flags on first build"). As a result, if the host supports kcfi but the target does not, e.g. when building for riscv on x86_64, the build would remain broken. Instead, make HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC depend on the only two architectures where the target used supports it to fix the build. CC: stable@vger.kernel.org Fixes: ca627e636551e ("rust: cfi: add support for CFI_CLANG with Rust") Signed-off-by: Conor Dooley Acked-by: Miguel Ojeda Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250908-distill-lint-1ae78bcf777c@spud Signed-off-by: Paul Walmsley --- arch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/Kconfig b/arch/Kconfig index ebe08b9186ad..74ff01133532 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -965,6 +965,7 @@ config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC def_bool y depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS depends on RUSTC_VERSION >= 107900 + depends on ARM64 || X86_64 # With GCOV/KASAN we need this fix: https://github.com/rust-lang/rust/pull/129373 depends on (RUSTC_LLVM_VERSION >= 190103 && RUSTC_VERSION >= 108200) || \ (!GCOV_KERNEL && !KASAN_GENERIC && !KASAN_SW_TAGS) -- cgit v1.2.3 From 781380d2cdef34559a0125ca6464b90bfc01594f Mon Sep 17 00:00:00 2001 From: Miquel Sabaté Solà Date: Mon, 15 Sep 2025 16:32:52 +0200 Subject: riscv: kgdb: Ensure that BUFMAX > NUMREGBYTES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current value of BUFMAX is similar as in other architectures, but as per documentation on KGDB (see 'Documentation/process/debugging/kgdb.rst'), BUFMAX has to be larger than NUMREGBYTES. Some NUMREGBYTES architectures (e.g. powerpc or hexagon) actually define BUFMAX in relation to NUMREGBYTES, and thus this condition is always guaranteed. Since 2048 is a value that is generally accepted on all architectures, and that is larger than the current value of NUMREGBYTES, we can keep this value in arch/riscv, but we can at least add an 'static_assert' as an extra measure just in case NUMREGBYTES changes in the future for some unforseen reason. Signed-off-by: Miquel Sabaté Solà Link: https://lore.kernel.org/r/20250915143252.154955-1-mikisabate@gmail.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/kgdb.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index 7559d728c5ff..78b18e2fd771 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -3,14 +3,18 @@ #ifndef __ASM_KGDB_H_ #define __ASM_KGDB_H_ +#include + #ifdef __KERNEL__ #define GDB_SIZEOF_REG sizeof(unsigned long) -#define DBG_MAX_REG_NUM (36) -#define NUMREGBYTES ((DBG_MAX_REG_NUM) * GDB_SIZEOF_REG) +#define DBG_MAX_REG_NUM 36 +#define NUMREGBYTES (DBG_MAX_REG_NUM * GDB_SIZEOF_REG) #define CACHE_FLUSH_IS_SAFE 1 #define BUFMAX 2048 +static_assert(BUFMAX > NUMREGBYTES, + "As per KGDB documentation, BUFMAX must be larger than NUMREGBYTES"); #ifdef CONFIG_RISCV_ISA_C #define BREAK_INSTR_SIZE 2 #else @@ -97,6 +101,7 @@ extern unsigned long kgdb_compiled_break; #define DBG_REG_STATUS_OFF 33 #define DBG_REG_BADADDR_OFF 34 #define DBG_REG_CAUSE_OFF 35 +/* NOTE: increase DBG_MAX_REG_NUM if you add more values here. */ extern const char riscv_gdb_stub_feature[64]; -- cgit v1.2.3 From ae9e9f3d67dcef7582a4524047b01e33c5185ddb Mon Sep 17 00:00:00 2001 From: Danil Skrebenkov Date: Fri, 19 Sep 2025 16:28:46 +0300 Subject: RISC-V: clear hot-unplugged cores from all task mm_cpumasks to avoid rfence errors openSBI v1.7 adds harts checks for ipi operations. Especially it adds comparison between hmask passed as an argument from linux and mask of online harts (from openSBI side). If they don't fit each other the error occurs. When cpu is offline, cpu_online_mask is explicitly cleared in __cpu_disable. However, there is no explicit clearing of mm_cpumask. mm_cpumask is used for rfence operations that call openSBI RFENCE extension which uses ipi to remote harts. If hart is offline there may be error if mask of linux is not as mask of online harts in openSBI. this patch adds explicit clearing of mm_cpumask for offline hart. Signed-off-by: Danil Skrebenkov Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20250919132849.31676-1-danil.skrebenkov@cloudbear.ru [pjw@kernel.org: rewrote subject line for clarity] Signed-off-by: Paul Walmsley --- arch/riscv/kernel/cpu-hotplug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index a1e38ecfc8be..3f50d3dd76c6 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -54,6 +54,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) pr_notice("CPU%u: off\n", cpu); + clear_tasks_mm_cpumask(cpu); /* Verify from the firmware if the cpu is really stopped*/ if (cpu_ops->cpu_is_stopped) ret = cpu_ops->cpu_is_stopped(cpu); -- cgit v1.2.3 From c199745d3ac3f836515a5734a6ca5c6f55a8809b Mon Sep 17 00:00:00 2001 From: Florian Schmaus Date: Mon, 6 Oct 2025 11:37:42 +0200 Subject: riscv: entry: fix typo in comment 'instruciton' -> 'instruction' Fix a typo in a comment in the RISC-V entry.S. Signed-off-by: Florian Schmaus Link: https://lore.kernel.org/r/20251006093742.53925-1-flo@geekplace.eu [pjw@kernel.org: wrote a basic patch description] Signed-off-by: Paul Walmsley --- arch/riscv/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index d3d92a4becc7..9b9dec6893b8 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -455,7 +455,7 @@ SYM_DATA_START_LOCAL(excp_vect_table) RISCV_PTR do_trap_ecall_s RISCV_PTR do_trap_unknown RISCV_PTR do_trap_ecall_m - /* instruciton page fault */ + /* instruction page fault */ ALT_PAGE_FAULT(RISCV_PTR do_page_fault) RISCV_PTR do_page_fault /* load page fault */ RISCV_PTR do_trap_unknown -- cgit v1.2.3 From 9e68bd803fac49274fde914466fd3b07c4d602c8 Mon Sep 17 00:00:00 2001 From: Fabian Vogt Date: Wed, 10 Sep 2025 17:25:13 +0200 Subject: riscv: kprobes: Fix probe address validation When adding a kprobe such as "p:probe/tcp_sendmsg _text+15392192", arch_check_kprobe would start iterating all instructions starting from _text until the probed address. Not only is this very inefficient, but literal values in there (e.g. left by function patching) are misinterpreted in a way that causes a desync. Fix this by doing it like x86: start the iteration at the closest preceding symbol instead of the given starting point. Fixes: 87f48c7ccc73 ("riscv: kprobe: Fixup kernel panic when probing an illegal position") Signed-off-by: Fabian Vogt Signed-off-by: Marvin Friedrich Acked-by: Guo Ren Link: https://lore.kernel.org/r/6191817.lOV4Wx5bFT@fvogt-thinkpad Signed-off-by: Paul Walmsley --- arch/riscv/kernel/probes/kprobes.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index c0738d6c6498..8723390c7cad 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -49,10 +49,15 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) post_kprobe_handler(p, kcb, regs); } -static bool __kprobes arch_check_kprobe(struct kprobe *p) +static bool __kprobes arch_check_kprobe(unsigned long addr) { - unsigned long tmp = (unsigned long)p->addr - p->offset; - unsigned long addr = (unsigned long)p->addr; + unsigned long tmp, offset; + + /* start iterating at the closest preceding symbol */ + if (!kallsyms_lookup_size_offset(addr, NULL, &offset)) + return false; + + tmp = addr - offset; while (tmp <= addr) { if (tmp == addr) @@ -71,7 +76,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if ((unsigned long)insn & 0x1) return -EILSEQ; - if (!arch_check_kprobe(p)) + if (!arch_check_kprobe((unsigned long)p->addr)) return -EILSEQ; /* copy instruction */ -- cgit v1.2.3 From 69a8b62a7aa1e54ff7623064f6507fa29c1d0d4e Mon Sep 17 00:00:00 2001 From: Han Gao Date: Wed, 10 Sep 2025 19:24:01 +0800 Subject: riscv: acpi: avoid errors caused by probing DT devices when ACPI is used Similar to the ARM64 commit 3505f30fb6a9s ("ARM64 / ACPI: If we chose to boot from acpi then disable FDT"), let's not do DT hardware probing if ACPI is enabled in early boot. This avoids errors caused by repeated driver probing. Signed-off-by: Han Gao Link: https://lore.kernel.org/r/20250910112401.552987-1-rabenda.cn@gmail.com [pjw@kernel.org: cleaned up patch description and subject] Signed-off-by: Paul Walmsley --- arch/riscv/kernel/setup.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 14235e58c539..b5bc5fc65cea 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -331,11 +331,14 @@ void __init setup_arch(char **cmdline_p) /* Parse the ACPI tables for possible boot-time configuration */ acpi_boot_table_init(); + if (acpi_disabled) { #if IS_ENABLED(CONFIG_BUILTIN_DTB) - unflatten_and_copy_device_tree(); + unflatten_and_copy_device_tree(); #else - unflatten_device_tree(); + unflatten_device_tree(); #endif + } + misc_mem_init(); init_resources(); -- cgit v1.2.3 From 7882d2c45ccba538cddb0615a893a008dd2efcde Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 9 Oct 2025 11:18:48 +0200 Subject: riscv: Respect dependencies of ARCH_HAS_ELF_CORE_EFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This kconfig symbol has dependencies and is only selectable if those dependencies are also enabled. Respect the dependencies. Fixes the following warning when configuring an 'allnoconfig': WARNING: unmet direct dependencies detected for ARCH_HAS_ELF_CORE_EFLAGS Depends on [n]: BINFMT_ELF [=n] && ELF_CORE [=y] Selected by [y]: - RISCV [=y] Fixes: 8c94db0ae97c ("binfmt_elf: preserve original ELF e_flags for core dumps") Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20251009-riscv-elf-core-eflags-v1-1-e9b45ab6b36d@linutronix.de Signed-off-by: Paul Walmsley --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0c6038dc5dfd..22cda9c452d2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -29,7 +29,7 @@ config RISCV select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX - select ARCH_HAS_ELF_CORE_EFLAGS + select ARCH_HAS_ELF_CORE_EFLAGS if BINFMT_ELF && ELF_CORE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL -- cgit v1.2.3 From 45e33f220fd625492c11e15733d8e9b4f9db82a4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 18 Sep 2025 13:58:57 -0700 Subject: drm/xe: Move rebar to be done earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There may be cases in which the BAR0 also needs to move to accommodate the bigger BAR2. However if it's not released, the BAR2 resize fails. During the vram probe it can't be released as it's already in use by xe_mmio for early register access. Add a new function in xe_vram and let xe_pci call it directly before even early device probe. This allows the BAR2 to resize in cases BAR0 also needs to move, assuming there aren't other reasons to hold that move: [] xe 0000:03:00.0: vgaarb: deactivate vga console [] xe 0000:03:00.0: [drm] Attempting to resize bar from 8192MiB -> 16384MiB [] xe 0000:03:00.0: BAR 0 [mem 0x83000000-0x83ffffff 64bit]: releasing [] xe 0000:03:00.0: BAR 2 [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] xe 0000:03:00.0: BAR 2 [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] xe 0000:03:00.0: BAR 0 [mem 0x83000000-0x83ffffff 64bit]: assigned [] pcieport 0000:00:01.0: PCI bridge to [bus 01-04] [] pcieport 0000:00:01.0: bridge window [mem 0x83000000-0x840fffff] [] pcieport 0000:00:01.0: bridge window [mem 0x4000000000-0x44007fffff 64bit pref] [] pcieport 0000:01:00.0: PCI bridge to [bus 02-04] [] pcieport 0000:01:00.0: bridge window [mem 0x83000000-0x840fffff] [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref] [] pcieport 0000:02:01.0: PCI bridge to [bus 03] [] pcieport 0000:02:01.0: bridge window [mem 0x83000000-0x83ffffff] [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref] [] xe 0000:03:00.0: [drm] BAR2 resized to 16384M [] xe 0000:03:00.0: [drm:xe_pci_probe [xe]] BATTLEMAGE e221:0000 dgfx:1 gfx:Xe2_HPG (20.02) ... For BMG there are additional fix needed in the PCI side, but this helps getting it to a working resize. All the rebar logic is more pci-specific than xe-specific and can be done very early in the probe sequence. In future it would be good to move it out of xe_vram.c, but this refactor is left for later. Cc: Ilpo Järvinen Cc: stable@vger.kernel.org # 6.12+ Link: https://lore.kernel.org/intel-xe/fafda2a3-fc63-ce97-d22b-803f771a4d19@linux.intel.com Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250918-xe-pci-rebar-2-v1-2-6c094702a074@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 2 ++ drivers/gpu/drm/xe/xe_vram.c | 34 ++++++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_vram.h | 1 + 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 687e919f8a8d..e706b2808553 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -869,6 +869,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; + xe_vram_resize_bar(xe); + err = xe_device_probe_early(xe); /* * In Boot Survivability mode, no drm card is exposed and driver diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index 70bcbb188867..b62a96f8ef9e 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -27,15 +27,35 @@ #define BAR_SIZE_SHIFT 20 -static void -_resize_bar(struct xe_device *xe, int resno, resource_size_t size) +/* + * Release all the BARs that could influence/block LMEMBAR resizing, i.e. + * assigned IORESOURCE_MEM_64 BARs + */ +static void release_bars(struct pci_dev *pdev) +{ + struct resource *res; + int i; + + pci_dev_for_each_resource(pdev, res, i) { + /* Resource already un-assigned, do not reset it */ + if (!res->parent) + continue; + + /* No need to release unrelated BARs */ + if (!(res->flags & IORESOURCE_MEM_64)) + continue; + + pci_release_resource(pdev, i); + } +} + +static void resize_bar(struct xe_device *xe, int resno, resource_size_t size) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int bar_size = pci_rebar_bytes_to_size(size); int ret; - if (pci_resource_len(pdev, resno)) - pci_release_resource(pdev, resno); + release_bars(pdev); ret = pci_resize_resource(pdev, resno, bar_size); if (ret) { @@ -51,7 +71,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) * if force_vram_bar_size is set, attempt to set to the requested size * else set to maximum possible size */ -static void resize_vram_bar(struct xe_device *xe) +void xe_vram_resize_bar(struct xe_device *xe) { int force_vram_bar_size = xe_modparam.force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -120,7 +140,7 @@ static void resize_vram_bar(struct xe_device *xe) pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); - _resize_bar(xe, LMEM_BAR, rebar_size); + resize_bar(xe, LMEM_BAR, rebar_size); pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); @@ -149,8 +169,6 @@ static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region * return -ENXIO; } - resize_vram_bar(xe); - lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); if (!lmem_bar->io_size) diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index 72860f714fc6..13505cfb184d 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -11,6 +11,7 @@ struct xe_device; struct xe_vram_region; +void xe_vram_resize_bar(struct xe_device *xe); int xe_vram_probe(struct xe_device *xe); struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement); -- cgit v1.2.3 From f3426ac54c42c3260096ddc50b5470eb179fb06a Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 8 Oct 2025 16:14:18 +0200 Subject: dpll: zl3073x: Increase maximum size of flash utility Newer firmware bundles contain a flash utility whose size exceeds the currently allowed limit. Increase the maximum allowed size to accommodate the newer utility version. Without this patch: # devlink dev flash i2c/1-0070 file fw_nosplit_v3.hex Failed to load firmware Flashing failed Error: zl3073x: FW load failed: [utility] component is too big (11000 bytes) Fixes: ca017409da694 ("dpll: zl3073x: Add firmware loading functionality") Suggested-by: Prathosh Satish Signed-off-by: Ivan Vecera Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251008141418.841053-1-ivecera@redhat.com Signed-off-by: Paolo Abeni --- drivers/dpll/zl3073x/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dpll/zl3073x/fw.c b/drivers/dpll/zl3073x/fw.c index d5418ff74886..def37fe8d9b0 100644 --- a/drivers/dpll/zl3073x/fw.c +++ b/drivers/dpll/zl3073x/fw.c @@ -37,7 +37,7 @@ struct zl3073x_fw_component_info { static const struct zl3073x_fw_component_info component_info[] = { [ZL_FW_COMPONENT_UTIL] = { .name = "utility", - .max_size = 0x2300, + .max_size = 0x4000, .load_addr = 0x20000000, .flash_type = ZL3073X_FLASH_TYPE_NONE, }, -- cgit v1.2.3 From 381f1ed15159c4b3f00dd37cc70924dedebeb111 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 10 Sep 2025 18:09:39 +0200 Subject: drm/xe/migrate: Fix an error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The exhaustive eviction accidently changed an error path goto to a return. Fix this. Fixes: 59eabff2a352 ("drm/xe: Convert xe_bo_create_pin_map() for exhaustive eviction") Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Francois Dugast Link: https://lore.kernel.org/r/20250910160939.103473-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index ad03afb5145f..7345a5b65169 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -434,7 +434,7 @@ int xe_migrate_init(struct xe_migrate *m) err = xe_migrate_lock_prepare_vm(tile, m, vm); if (err) - return err; + goto err_out; if (xe->info.has_usm) { struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, -- cgit v1.2.3 From 4dd5b5ac089bb6ea719b7ffb748707ac9cbce4e4 Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 8 Oct 2025 14:44:17 +0200 Subject: Revert "fs: make vfs_fileattr_[get|set] return -EOPNOTSUPP" This reverts commit 474b155adf3927d2c944423045757b54aa1ca4de. This patch caused regression in ioctl_setflags(). Underlying filesystems use EOPNOTSUPP to indicate that flag is not supported. This error is also gets converted in ioctl_setflags(). Therefore, for unsupported flags error changed from EOPNOSUPP to ENOIOCTLCMD. Link: https://lore.kernel.org/linux-xfs/a622643f-1585-40b0-9441-cf7ece176e83@kernel.org/ Signed-off-by: Andrey Albershteyn Signed-off-by: Christian Brauner --- fs/file_attr.c | 12 ++---------- fs/fuse/ioctl.c | 4 ---- fs/overlayfs/copy_up.c | 2 +- fs/overlayfs/inode.c | 5 ++++- 4 files changed, 7 insertions(+), 16 deletions(-) diff --git a/fs/file_attr.c b/fs/file_attr.c index 12424d4945d0..460b2dd21a85 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -84,7 +84,7 @@ int vfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) int error; if (!inode->i_op->fileattr_get) - return -EOPNOTSUPP; + return -ENOIOCTLCMD; error = security_inode_file_getattr(dentry, fa); if (error) @@ -270,7 +270,7 @@ int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, int err; if (!inode->i_op->fileattr_set) - return -EOPNOTSUPP; + return -ENOIOCTLCMD; if (!inode_owner_or_capable(idmap, inode)) return -EPERM; @@ -312,8 +312,6 @@ int ioctl_getflags(struct file *file, unsigned int __user *argp) int err; err = vfs_fileattr_get(file->f_path.dentry, &fa); - if (err == -EOPNOTSUPP) - err = -ENOIOCTLCMD; if (!err) err = put_user(fa.flags, argp); return err; @@ -335,8 +333,6 @@ int ioctl_setflags(struct file *file, unsigned int __user *argp) fileattr_fill_flags(&fa, flags); err = vfs_fileattr_set(idmap, dentry, &fa); mnt_drop_write_file(file); - if (err == -EOPNOTSUPP) - err = -ENOIOCTLCMD; } } return err; @@ -349,8 +345,6 @@ int ioctl_fsgetxattr(struct file *file, void __user *argp) int err; err = vfs_fileattr_get(file->f_path.dentry, &fa); - if (err == -EOPNOTSUPP) - err = -ENOIOCTLCMD; if (!err) err = copy_fsxattr_to_user(&fa, argp); @@ -371,8 +365,6 @@ int ioctl_fssetxattr(struct file *file, void __user *argp) if (!err) { err = vfs_fileattr_set(idmap, dentry, &fa); mnt_drop_write_file(file); - if (err == -EOPNOTSUPP) - err = -ENOIOCTLCMD; } } return err; diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index 57032eadca6c..fdc175e93f74 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -536,8 +536,6 @@ int fuse_fileattr_get(struct dentry *dentry, struct file_kattr *fa) cleanup: fuse_priv_ioctl_cleanup(inode, ff); - if (err == -ENOTTY) - err = -EOPNOTSUPP; return err; } @@ -574,7 +572,5 @@ int fuse_fileattr_set(struct mnt_idmap *idmap, cleanup: fuse_priv_ioctl_cleanup(inode, ff); - if (err == -ENOTTY) - err = -EOPNOTSUPP; return err; } diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index aac7e34f56c1..604a82acd164 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -178,7 +178,7 @@ static int ovl_copy_fileattr(struct inode *inode, const struct path *old, err = ovl_real_fileattr_get(old, &oldfa); if (err) { /* Ntfs-3g returns -EINVAL for "no fileattr support" */ - if (err == -EOPNOTSUPP || err == -EINVAL) + if (err == -ENOTTY || err == -EINVAL) return 0; pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n", old->dentry, err); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index aaa4cf579561..e11f310ce092 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -720,7 +720,10 @@ int ovl_real_fileattr_get(const struct path *realpath, struct file_kattr *fa) if (err) return err; - return vfs_fileattr_get(realpath->dentry, fa); + err = vfs_fileattr_get(realpath->dentry, fa); + if (err == -ENOIOCTLCMD) + err = -ENOTTY; + return err; } int ovl_fileattr_get(struct dentry *dentry, struct file_kattr *fa) -- cgit v1.2.3 From d90ad28e8aa482e397150e22f3762173d918a724 Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 8 Oct 2025 14:44:18 +0200 Subject: fs: return EOPNOTSUPP from file_setattr/file_getattr syscalls These syscalls call to vfs_fileattr_get/set functions which return ENOIOCTLCMD if filesystem doesn't support setting file attribute on an inode. For syscalls EOPNOTSUPP would be more appropriate return error. Signed-off-by: Andrey Albershteyn Reviewed-by: Jan Kara Reviewed-by: Arnd Bergmann Signed-off-by: Christian Brauner --- fs/file_attr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/file_attr.c b/fs/file_attr.c index 460b2dd21a85..1dcec88c0680 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -416,6 +416,8 @@ SYSCALL_DEFINE5(file_getattr, int, dfd, const char __user *, filename, } error = vfs_fileattr_get(filepath.dentry, &fa); + if (error == -ENOIOCTLCMD || error == -ENOTTY) + error = -EOPNOTSUPP; if (error) return error; @@ -483,6 +485,8 @@ SYSCALL_DEFINE5(file_setattr, int, dfd, const char __user *, filename, if (!error) { error = vfs_fileattr_set(mnt_idmap(filepath.mnt), filepath.dentry, &fa); + if (error == -ENOIOCTLCMD || error == -ENOTTY) + error = -EOPNOTSUPP; mnt_drop_write(filepath.mnt); } -- cgit v1.2.3 From 7933a585d70ee496fa341b50b8b0a95b131867ff Mon Sep 17 00:00:00 2001 From: Seong-Gwang Heo Date: Thu, 9 Oct 2025 13:41:48 +0800 Subject: ovl: remove redundant IOCB_DIO_CALLER_COMP clearing The backing_file_write_iter() function, which is called immediately after this code, already contains identical logic to clear the IOCB_DIO_CALLER_COMP flag along with the same explanatory comment. There is no need to duplicate this operation in the overlayfs code. Signed-off-by: Seong-Gwang Heo Fixes: a6293b3e285c ("fs: factor out backing_file_{read,write}_iter() helpers") Acked-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/overlayfs/file.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index fc52c796061d..7ab2c9daffd0 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -369,11 +369,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) if (!ovl_should_sync(OVL_FS(inode->i_sb))) ifl &= ~(IOCB_DSYNC | IOCB_SYNC); - /* - * Overlayfs doesn't support deferred completions, don't copy - * this property in case it is set by the issuer. - */ - ifl &= ~IOCB_DIO_CALLER_COMP; ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx); out_unlock: -- cgit v1.2.3 From 75188605c56d10c1bd3b1cd94f4872f349c3a9c8 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Oct 2025 06:06:29 -0700 Subject: drm/xe: Handle mixed mappings and existing VRAM on atomic faults Moving to VRAM will fail if mixed mappings are present or if the page is already located in VRAM. Atomic faults that require a move to VRAM currently retry without attempting to evict mixed mappings or locate existing VRAM mappings. This patch fixes the issue by attempting to evict mixed mappings or find existing VRAM pages when a move to VRAM fails during atomic fault handling. Fixes: a9ac0fa455b0 ("drm/xe: Strict migration policy for atomic SVM faults") Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251009130629.3531962-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_svm.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 7e2db71ff34e..b268ee0d2271 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -1073,7 +1073,17 @@ retry: drm_dbg(&vm->xe->drm, "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", vm->usm.asid, ERR_PTR(err)); - goto retry; + + /* + * In the devmem-only case, mixed mappings may + * be found. The get_pages function will fix + * these up to a single location, allowing the + * page fault handler to make forward progress. + */ + if (ctx.devmem_only) + goto get_pages; + else + goto retry; } else { drm_err(&vm->xe->drm, "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", @@ -1083,6 +1093,7 @@ retry: } } +get_pages: get_pages_start = xe_svm_stats_ktime_get(); range_debug(range, "GET PAGES"); -- cgit v1.2.3 From 4b47a8601b71ad98833b447d465592d847b4dc77 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Sep 2025 11:12:06 -0400 Subject: NFSD: Define a proc_layoutcommit for the FlexFiles layout type Avoid a crash if a pNFS client should happen to send a LAYOUTCOMMIT operation on a FlexFiles layout. Reported-by: Robert Morris Closes: https://lore.kernel.org/linux-nfs/152f99b2-ba35-4dec-93a9-4690e625dccd@oracle.com/T/#t Cc: Thomas Haynes Cc: stable@vger.kernel.org Fixes: 9b9960a0ca47 ("nfsd: Add a super simple flex file server") Signed-off-by: Chuck Lever --- fs/nfsd/flexfilelayout.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index c318cf74e388..0f1a35400cd5 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c @@ -125,6 +125,13 @@ nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp, return 0; } +static __be32 +nfsd4_ff_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp, + struct nfsd4_layoutcommit *lcp) +{ + return nfs_ok; +} + const struct nfsd4_layout_ops ff_layout_ops = { .notify_types = NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, @@ -133,4 +140,5 @@ const struct nfsd4_layout_ops ff_layout_ops = { .encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo, .proc_layoutget = nfsd4_ff_proc_layoutget, .encode_layoutget = nfsd4_ff_encode_layoutget, + .proc_layoutcommit = nfsd4_ff_proc_layoutcommit, }; -- cgit v1.2.3 From 3c652c3a71de1d30d72dc82c3bead8deb48eb749 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Tue, 16 Sep 2025 17:33:36 +0800 Subject: jbd2: ensure that all ongoing I/O complete before freeing blocks When releasing file system metadata blocks in jbd2_journal_forget(), if this buffer has not yet been checkpointed, it may have already been written back, currently be in the process of being written back, or has not yet written back. jbd2_journal_forget() calls jbd2_journal_try_remove_checkpoint() to check the buffer's status and add it to the current transaction if it has not been written back. This buffer can only be reallocated after the transaction is committed. jbd2_journal_try_remove_checkpoint() attempts to lock the buffer and check its dirty status while holding the buffer lock. If the buffer has already been written back, everything proceeds normally. However, there are two issues. First, the function returns immediately if the buffer is locked by the write-back process. It does not wait for the write-back to complete. Consequently, until the current transaction is committed and the block is reallocated, there is no guarantee that the I/O will complete. This means that ongoing I/O could write stale metadata to the newly allocated block, potentially corrupting data. Second, the function unlocks the buffer as soon as it detects that the buffer is still dirty. If a concurrent write-back occurs immediately after this unlocking and before clear_buffer_dirty() is called in jbd2_journal_forget(), data corruption can theoretically still occur. Although these two issues are unlikely to occur in practice since the undergoing metadata writeback I/O does not take this long to complete, it's better to explicitly ensure that all ongoing I/O operations are completed. Fixes: 597599268e3b ("jbd2: discard dirty data when forgetting an un-journalled buffer") Cc: stable@kernel.org Suggested-by: Jan Kara Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Message-ID: <20250916093337.3161016-2-yi.zhang@huaweicloud.com> Signed-off-by: Theodore Ts'o --- fs/jbd2/transaction.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index c7867139af69..3e510564de6e 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1659,6 +1659,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh) int drop_reserve = 0; int err = 0; int was_modified = 0; + int wait_for_writeback = 0; if (is_handle_aborted(handle)) return -EROFS; @@ -1782,18 +1783,22 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh) } /* - * The buffer is still not written to disk, we should - * attach this buffer to current transaction so that the - * buffer can be checkpointed only after the current - * transaction commits. + * The buffer has not yet been written to disk. We should + * either clear the buffer or ensure that the ongoing I/O + * is completed, and attach this buffer to current + * transaction so that the buffer can be checkpointed only + * after the current transaction commits. */ clear_buffer_dirty(bh); + wait_for_writeback = 1; __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); spin_unlock(&journal->j_list_lock); } drop: __brelse(bh); spin_unlock(&jh->b_state_lock); + if (wait_for_writeback) + wait_on_buffer(bh); jbd2_journal_put_journal_head(jh); if (drop_reserve) { /* no need to reserve log space for this block -bzzz */ -- cgit v1.2.3 From 328a782cb138029182e521c08f50eb1587db955d Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Tue, 16 Sep 2025 17:33:37 +0800 Subject: ext4: wait for ongoing I/O to complete before freeing blocks When freeing metadata blocks in nojournal mode, ext4_forget() calls bforget() to clear the dirty flag on the buffer_head and remvoe associated mappings. This is acceptable if the metadata has not yet begun to be written back. However, if the write-back has already started but is not yet completed, ext4_forget() will have no effect. Subsequently, ext4_mb_clear_bb() will immediately return the block to the mb allocator. This block can then be reallocated immediately, potentially causing an data corruption issue. Fix this by clearing the buffer's dirty flag and waiting for the ongoing I/O to complete, ensuring that no further writes to stale data will occur. Fixes: 16e08b14a455 ("ext4: cleanup clean_bdev_aliases() calls") Cc: stable@kernel.org Reported-by: Gao Xiang Closes: https://lore.kernel.org/linux-ext4/a9417096-9549-4441-9878-b1955b899b4e@huaweicloud.com/ Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Message-ID: <20250916093337.3161016-3-yi.zhang@huaweicloud.com> Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index b3e9b7bd7978..a0e66bc10093 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -280,9 +280,16 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, bh, is_metadata, inode->i_mode, test_opt(inode->i_sb, DATA_FLAGS)); - /* In the no journal case, we can just do a bforget and return */ + /* + * In the no journal case, we should wait for the ongoing buffer + * to complete and do a forget. + */ if (!ext4_handle_valid(handle)) { - bforget(bh); + if (bh) { + clear_buffer_dirty(bh); + wait_on_buffer(bh); + __bforget(bh); + } return 0; } -- cgit v1.2.3 From 4b471b736ea1ce08113a12bd7dcdaea621b0f65f Mon Sep 17 00:00:00 2001 From: Zeno Endemann Date: Thu, 25 Sep 2025 17:24:33 +0200 Subject: ext4, doc: fix and improve directory hash tree description Some of the details about how directory hash trees work were confusing or outright wrong, this patch should fix those. A note on dx_tail's dt_reserved member, as far as I can tell the kernel never sets this explicitly, so its content is apparently left-overs from what was there before (for the dx_root I've seen remnants of a ext4_dir_entry_tail struct from when the dir was not yet a hash dir). Signed-off-by: Zeno Endemann Message-ID: <20250925152435.22749-1-zeno.endemann@mailbox.org> Signed-off-by: Theodore Ts'o --- Documentation/filesystems/ext4/directory.rst | 63 ++++++++++++++-------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/Documentation/filesystems/ext4/directory.rst b/Documentation/filesystems/ext4/directory.rst index 6eece8e31df8..9b003a4d453f 100644 --- a/Documentation/filesystems/ext4/directory.rst +++ b/Documentation/filesystems/ext4/directory.rst @@ -183,10 +183,10 @@ in the place where the name normally goes. The structure is - det_checksum - Directory leaf block checksum. -The leaf directory block checksum is calculated against the FS UUID, the -directory's inode number, the directory's inode generation number, and -the entire directory entry block up to (but not including) the fake -directory entry. +The leaf directory block checksum is calculated against the FS UUID (or +the checksum seed, if that feature is enabled for the fs), the directory's +inode number, the directory's inode generation number, and the entire +directory entry block up to (but not including) the fake directory entry. Hash Tree Directories ~~~~~~~~~~~~~~~~~~~~~ @@ -196,12 +196,12 @@ new feature was added to ext3 to provide a faster (but peculiar) balanced tree keyed off a hash of the directory entry name. If the EXT4_INDEX_FL (0x1000) flag is set in the inode, this directory uses a hashed btree (htree) to organize and find directory entries. For -backwards read-only compatibility with ext2, this tree is actually -hidden inside the directory file, masquerading as “empty” directory data -blocks! It was stated previously that the end of the linear directory -entry table was signified with an entry pointing to inode 0; this is -(ab)used to fool the old linear-scan algorithm into thinking that the -rest of the directory block is empty so that it moves on. +backwards read-only compatibility with ext2, interior tree nodes are actually +hidden inside the directory file, masquerading as “empty” directory entries +spanning the whole block. It was stated previously that directory entries +with the inode set to 0 are treated as unused entries; this is (ab)used to +fool the old linear-scan algorithm into skipping over those blocks containing +the interior tree node data. The root of the tree always lives in the first data block of the directory. By ext2 custom, the '.' and '..' entries must appear at the @@ -209,24 +209,24 @@ beginning of this first block, so they are put here as two ``struct ext4_dir_entry_2`` s and not stored in the tree. The rest of the root node contains metadata about the tree and finally a hash->block map to find nodes that are lower in the htree. If -``dx_root.info.indirect_levels`` is non-zero then the htree has two -levels; the data block pointed to by the root node's map is an interior -node, which is indexed by a minor hash. Interior nodes in this tree -contains a zeroed out ``struct ext4_dir_entry_2`` followed by a -minor_hash->block map to find leafe nodes. Leaf nodes contain a linear -array of all ``struct ext4_dir_entry_2``; all of these entries -(presumably) hash to the same value. If there is an overflow, the -entries simply overflow into the next leaf node, and the -least-significant bit of the hash (in the interior node map) that gets -us to this next leaf node is set. - -To traverse the directory as a htree, the code calculates the hash of -the desired file name and uses it to find the corresponding block -number. If the tree is flat, the block is a linear array of directory -entries that can be searched; otherwise, the minor hash of the file name -is computed and used against this second block to find the corresponding -third block number. That third block number will be a linear array of -directory entries. +``dx_root.info.indirect_levels`` is non-zero then the htree has that many +levels and the blocks pointed to by the root node's map are interior nodes. +These interior nodes have a zeroed out ``struct ext4_dir_entry_2`` followed by +a hash->block map to find nodes of the next level. Leaf nodes look like +classic linear directory blocks, but all of its entries have a hash value +equal or greater than the indicated hash of the parent node. + +The actual hash value for an entry name is only 31 bits, the least-significant +bit is set to 0. However, if there is a hash collision between directory +entries, the least-significant bit may get set to 1 on interior nodes in the +case where these two (or more) hash-colliding entries do not fit into one leaf +node and must be split across multiple nodes. + +To look up a name in such a htree, the code calculates the hash of the desired +file name and uses it to find the leaf node with the range of hash values the +calculated hash falls into (in other words, a lookup works basically the same +as it would in a B-Tree keyed by the hash value), and possibly also scanning +the leaf nodes that follow (in tree order) in case of hash collisions. To traverse the directory as a linear array (such as the old code does), the code simply reads every data block in the directory. The blocks used @@ -319,7 +319,8 @@ of a data block: * - 0x24 - __le32 - block - - The block number (within the directory file) that goes with hash=0. + - The block number (within the directory file) that lead to the left-most + leaf node, i.e. the leaf containing entries with the lowest hash values. * - 0x28 - struct dx_entry - entries[0] @@ -442,7 +443,7 @@ The dx_tail structure is 8 bytes long and looks like this: * - 0x0 - u32 - dt_reserved - - Zero. + - Unused (but still part of the checksum curiously). * - 0x4 - __le32 - dt_checksum @@ -450,4 +451,4 @@ The dx_tail structure is 8 bytes long and looks like this: The checksum is calculated against the FS UUID, the htree index header (dx_root or dx_node), all of the htree indices (dx_entry) that are in -use, and the tail block (dx_tail). +use, and the tail block (dx_tail) with the dt_checksum initially set to 0. -- cgit v1.2.3 From 4cbc08649a54c3d533df9832342d52d409dfbbf0 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 9 Oct 2025 18:10:47 -0700 Subject: drm/xe: Enable media sampler power gating Where applicable, enable media sampler power gating. Also, add it to the powergate_info debugfs. v2: Remove the sampler powergate status since it is cleared quickly anyway. v3: Use vcs mask (Rodrigo) and fix the version check for media v4: Remove extra spaces v5: Media samplers are independent of vcs mask, use Media version 1255 (Matt Roper) Fixes: 38e8c4184ea0 ("drm/xe: Enable Coarse Power Gating") Cc: Rodrigo Vivi Cc: Matt Roper Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Link: https://lore.kernel.org/r/20251010011047.2047584-1-vinay.belgaumkar@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_gt_idle.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 06cb6b02ec64..51f2a03847f9 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -342,6 +342,7 @@ #define POWERGATE_ENABLE XE_REG(0xa210) #define RENDER_POWERGATE_ENABLE REG_BIT(0) #define MEDIA_POWERGATE_ENABLE REG_BIT(1) +#define MEDIA_SAMPLERS_POWERGATE_ENABLE REG_BIT(2) #define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n)) #define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n)) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index f8950a52d0a4..bdc9d9877ec4 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -124,6 +124,9 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (xe_gt_is_main_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + gtidle->powergate_enable |= MEDIA_SAMPLERS_POWERGATE_ENABLE; + if (xe->info.platform != XE_DG1) { for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if ((gt->info.engine_mask & BIT(i))) @@ -246,6 +249,11 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n, str_up_down(pg_status & media_slices[n].status_bit)); } + + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + drm_printf(p, "Media Samplers Power Gating Enabled: %s\n", + str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE)); + return 0; } -- cgit v1.2.3 From f85d4062bc6894747495136e7fc020a28a875a15 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 9 Oct 2025 23:02:28 +0000 Subject: drm/xe: Fix copyright and function naming in xe_ttm_sys_mgr - Correct Red Hat copyright year from "2002" to "2022". - Rename ttm_sys_mgr_fini() to xe_ttm_sys_mgr_fini() to avoid confusion with generic TTM helpers. No functional changes intended. Reviewed-by: Rodrigo Vivi Signed-off-by: Shuicheng Lin Link: https://lore.kernel.org/r/20251009230239.2830207-6-shuicheng.lin@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c index d38b91872da3..3e404eb8d098 100644 --- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: MIT /* * Copyright © 2021-2022 Intel Corporation - * Copyright (C) 2021-2002 Red Hat + * Copyright (C) 2021-2022 Red Hat */ #include "xe_ttm_sys_mgr.h" @@ -85,7 +85,7 @@ static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = { .debug = xe_ttm_sys_mgr_debug }; -static void ttm_sys_mgr_fini(struct drm_device *drm, void *arg) +static void xe_ttm_sys_mgr_fini(struct drm_device *drm, void *arg) { struct xe_device *xe = (struct xe_device *)arg; struct ttm_resource_manager *man = &xe->mem.sys_mgr; @@ -116,5 +116,5 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe) ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man); ttm_resource_manager_set_used(man, true); - return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe); + return drmm_add_action_or_reset(&xe->drm, xe_ttm_sys_mgr_fini, xe); } -- cgit v1.2.3 From 0145a99eacae7b67baba62a1036ef7c4b5e90a7a Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 9 Oct 2025 23:02:29 +0000 Subject: drm/xe: Fix copyright in xe_ttm_stolen_mgr - Correct Red Hat copyright year from "2002" to "2022". - Sort the include lines alphabetically. No functional changes intended. Reviewed-by: Tejas Upadhyay Cc: Rodrigo Vivi Signed-off-by: Shuicheng Lin Link: https://lore.kernel.org/r/20251009230239.2830207-7-shuicheng.lin@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index dc588255674d..9b00bdc8ef1a 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: MIT /* * Copyright © 2021-2023 Intel Corporation - * Copyright (C) 2021-2002 Red Hat + * Copyright (C) 2021-2022 Red Hat */ #include @@ -24,8 +24,8 @@ #include "xe_sriov.h" #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_vram_mgr.h" -#include "xe_wa.h" #include "xe_vram.h" +#include "xe_wa.h" struct xe_ttm_stolen_mgr { struct xe_ttm_vram_mgr base; -- cgit v1.2.3 From 65369b8e29613682b3c0484e0630d8500b421132 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 9 Oct 2025 23:02:30 +0000 Subject: drm/xe: Change return type of detect_bar2_dgfx() from s64 to u64 The function never returns a negative value, and the return value is assigned to a u64 variable. Use u64 for better type correctness and clarity. v2: add assert to catch theoretical negative or zero stolen size. (Rodrigo) Cc: Rodrigo Vivi Signed-off-by: Shuicheng Lin Link: https://lore.kernel.org/r/20251009230239.2830207-8-shuicheng.lin@intel.com Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 9b00bdc8ef1a..e368b2a36bac 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -81,7 +81,7 @@ static u32 get_wopcm_size(struct xe_device *xe) return wopcm_size; } -static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) +static u64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct xe_vram_region *tile_vram = xe_device_get_root_tile(xe)->mem.vram; resource_size_t tile_io_start = xe_vram_region_io_start(tile_vram); @@ -105,6 +105,8 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) return 0; stolen_size = tile_size - mgr->stolen_base; + + xe_assert(xe, stolen_size > wopcm_size); stolen_size -= wopcm_size; /* Verify usage fits in the actual resource available */ -- cgit v1.2.3 From 1d3ad183943b38eec2acf72a0ae98e635dc8456b Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Tue, 30 Sep 2025 16:58:10 +0530 Subject: ext4: detect invalid INLINE_DATA + EXTENTS flag combination syzbot reported a BUG_ON in ext4_es_cache_extent() when opening a verity file on a corrupted ext4 filesystem mounted without a journal. The issue is that the filesystem has an inode with both the INLINE_DATA and EXTENTS flags set: EXT4-fs error (device loop0): ext4_cache_extents:545: inode #15: comm syz.0.17: corrupted extent tree: lblk 0 < prev 66 Investigation revealed that the inode has both flags set: DEBUG: inode 15 - flag=1, i_inline_off=164, has_inline=1, extents_flag=1 This is an invalid combination since an inode should have either: - INLINE_DATA: data stored directly in the inode - EXTENTS: data stored in extent-mapped blocks Having both flags causes ext4_has_inline_data() to return true, skipping extent tree validation in __ext4_iget(). The unvalidated out-of-order extents then trigger a BUG_ON in ext4_es_cache_extent() due to integer underflow when calculating hole sizes. Fix this by detecting this invalid flag combination early in ext4_iget() and rejecting the corrupted inode. Cc: stable@kernel.org Reported-and-tested-by: syzbot+038b7bf43423e132b308@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=038b7bf43423e132b308 Suggested-by: Zhang Yi Signed-off-by: Deepanshu Kartikey Reviewed-by: Zhang Yi Message-ID: <20250930112810.315095-1-kartikey406@gmail.com> Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f9e4ac87211e..e99306a8f47c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5319,6 +5319,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, } ei->i_flags = le32_to_cpu(raw_inode->i_flags); ext4_set_inode_flags(inode, true); + /* Detect invalid flag combination - can't have both inline data and extents */ + if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) && + ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { + ext4_error_inode(inode, function, line, 0, + "inode has both inline data and extents flags"); + ret = -EFSCORRUPTED; + goto bad_inode; + } inode->i_blocks = ext4_inode_blocks(raw_inode, ei); ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); if (ext4_has_feature_64bit(sb)) -- cgit v1.2.3 From 971843c511c3c2f6eda96c6b03442913bfee6148 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 7 Oct 2025 15:49:37 +0200 Subject: ext4: free orphan info with kvfree Orphan info is now getting allocated with kvmalloc_array(). Free it with kvfree() instead of kfree() to avoid complaints from mm. Reported-by: Chris Mason Fixes: 0a6ce20c1564 ("ext4: verify orphan file size is not too big") Cc: stable@vger.kernel.org Signed-off-by: Jan Kara Message-ID: <20251007134936.7291-2-jack@suse.cz> Signed-off-by: Theodore Ts'o --- fs/ext4/orphan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c index 33c3a89396b1..82d5e7501455 100644 --- a/fs/ext4/orphan.c +++ b/fs/ext4/orphan.c @@ -513,7 +513,7 @@ void ext4_release_orphan_info(struct super_block *sb) return; for (i = 0; i < oi->of_blocks; i++) brelse(oi->of_binfo[i].ob_bh); - kfree(oi->of_binfo); + kvfree(oi->of_binfo); } static struct ext4_orphan_block_tail *ext4_orphan_block_tail( @@ -637,7 +637,7 @@ int ext4_init_orphan_info(struct super_block *sb) out_free: for (i--; i >= 0; i--) brelse(oi->of_binfo[i].ob_bh); - kfree(oi->of_binfo); + kvfree(oi->of_binfo); out_put: iput(inode); return ret; -- cgit v1.2.3 From 034417c1439a533a315562a57bd340d963eaac6b Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Fri, 10 Oct 2025 08:52:39 +0800 Subject: KVM: x86/pmu: Don't try to get perf capabilities for hybrid CPUs Explicitly zero kvm_host_pmu instead of attempting to get the perf PMU capabilities when running on a hybrid CPU to avoid running afoul of perf's sanity check. ------------[ cut here ]------------ WARNING: arch/x86/events/core.c:3089 at perf_get_x86_pmu_capability+0xd/0xc0, Call Trace: kvm_x86_vendor_init+0x1b0/0x1a40 [kvm] vmx_init+0xdb/0x260 [kvm_intel] vt_init+0x12/0x9d0 [kvm_intel] do_one_initcall+0x60/0x3f0 do_init_module+0x97/0x2b0 load_module+0x2d08/0x2e30 init_module_from_file+0x96/0xe0 idempotent_init_module+0x117/0x330 __x64_sys_finit_module+0x73/0xe0 Always read the capabilities for non-hybrid CPUs, i.e. don't entirely revert to reading capabilities if and only if KVM wants to use a PMU, as it may be useful to have the host PMU capabilities available, e.g. if only or debug. Reported-by: Chaitanya Kumar Borah Closes: https://lore.kernel.org/all/70b64347-2aca-4511-af78-a767d5fa8226@intel.com/ Fixes: 51f34b1e650f ("KVM: x86/pmu: Snapshot host (i.e. perf's) reported PMU capabilities") Suggested-by: Sean Christopherson Signed-off-by: Dapeng Mi Link: https://lore.kernel.org/r/20251010005239.146953-1-dapeng1.mi@linux.intel.com [sean: rework changelog, call out hybrid CPUs in shortlog] Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 40ac4cb44ed2..487ad19a236e 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -108,16 +108,18 @@ void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL; int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS; - perf_get_x86_pmu_capability(&kvm_host_pmu); - /* * Hybrid PMUs don't play nice with virtualization without careful * configuration by userspace, and KVM's APIs for reporting supported * vPMU features do not account for hybrid PMUs. Disable vPMU support * for hybrid PMUs until KVM gains a way to let userspace opt-in. */ - if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { enable_pmu = false; + memset(&kvm_host_pmu, 0, sizeof(kvm_host_pmu)); + } else { + perf_get_x86_pmu_capability(&kvm_host_pmu); + } if (enable_pmu) { /* -- cgit v1.2.3 From d2042d8f96ddefdeee823737f813efe3ab4b4e8d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:54 -0700 Subject: KVM: Rework KVM_CAP_GUEST_MEMFD_MMAP into KVM_CAP_GUEST_MEMFD_FLAGS Rework the not-yet-released KVM_CAP_GUEST_MEMFD_MMAP into a more generic KVM_CAP_GUEST_MEMFD_FLAGS capability so that adding new flags doesn't require a new capability, and so that developers aren't tempted to bundle multiple flags into a single capability. Note, kvm_vm_ioctl_check_extension_generic() can only return a 32-bit value, but that limitation can be easily circumvented by adding e.g. KVM_CAP_GUEST_MEMFD_FLAGS2 in the unlikely event guest_memfd supports more than 32 flags. Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-2-seanjc@google.com Signed-off-by: Sean Christopherson --- Documentation/virt/kvm/api.rst | 10 +++++++--- include/uapi/linux/kvm.h | 2 +- tools/testing/selftests/kvm/guest_memfd_test.c | 13 ++++++------- virt/kvm/kvm_main.c | 7 +++++-- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 6ae24c5ca559..7ba92f2ced38 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6432,9 +6432,13 @@ most one mapping per page, i.e. binding multiple memory regions to a single guest_memfd range is not allowed (any number of memory regions can be bound to a single guest_memfd file, but the bound ranges must not overlap). -When the capability KVM_CAP_GUEST_MEMFD_MMAP is supported, the 'flags' field -supports GUEST_MEMFD_FLAG_MMAP. Setting this flag on guest_memfd creation -enables mmap() and faulting of guest_memfd memory to host userspace. +The capability KVM_CAP_GUEST_MEMFD_FLAGS enumerates the `flags` that can be +specified via KVM_CREATE_GUEST_MEMFD. Currently defined flags: + + ============================ ================================================ + GUEST_MEMFD_FLAG_MMAP Enable using mmap() on the guest_memfd file + descriptor. + ============================ ================================================ When the KVM MMU performs a PFN lookup to service a guest fault and the backing guest_memfd has the GUEST_MEMFD_FLAG_MMAP set, then the fault will always be diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6efa98a57ec1..b1d52d0c56ec 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -962,7 +962,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 -#define KVM_CAP_GUEST_MEMFD_MMAP 244 +#define KVM_CAP_GUEST_MEMFD_FLAGS 244 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index b3ca6737f304..3e58bd496104 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -262,19 +262,17 @@ static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags) static void test_guest_memfd(unsigned long vm_type) { - uint64_t flags = 0; struct kvm_vm *vm; size_t total_size; size_t page_size; + uint64_t flags; int fd; page_size = getpagesize(); total_size = page_size * 4; vm = vm_create_barebones_type(vm_type); - - if (vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP)) - flags |= GUEST_MEMFD_FLAG_MMAP; + flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); test_create_guest_memfd_multiple(vm); test_create_guest_memfd_invalid_sizes(vm, flags, page_size); @@ -328,13 +326,14 @@ static void test_guest_memfd_guest(void) size_t size; int fd, i; - if (!kvm_has_cap(KVM_CAP_GUEST_MEMFD_MMAP)) + if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS)) return; vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code); - TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP), - "Default VM type should always support guest_memfd mmap()"); + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP, + "Default VM type should support MMAP, supported flags = 0x%x", + vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); size = vm->page_size; fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 226faeaa8e56..e3a268757621 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4928,8 +4928,11 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_KVM_GUEST_MEMFD case KVM_CAP_GUEST_MEMFD: return 1; - case KVM_CAP_GUEST_MEMFD_MMAP: - return !kvm || kvm_arch_supports_gmem_mmap(kvm); + case KVM_CAP_GUEST_MEMFD_FLAGS: + if (!kvm || kvm_arch_supports_gmem_mmap(kvm)) + return GUEST_MEMFD_FLAG_MMAP; + + return 0; #endif default: break; -- cgit v1.2.3 From fe2bf6234e947bf5544db6d386af1df2a8db80f3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:55 -0700 Subject: KVM: guest_memfd: Add INIT_SHARED flag, reject user page faults if not set Add a guest_memfd flag to allow userspace to state that the underlying memory should be configured to be initialized as shared, and reject user page faults if the guest_memfd instance's memory isn't shared. Because KVM doesn't yet support in-place private<=>shared conversions, all guest_memfd memory effectively follows the initial state. Alternatively, KVM could deduce the initial state based on MMAP, which for all intents and purposes is what KVM currently does. However, implicitly deriving the default state based on MMAP will result in a messy ABI when support for in-place conversions is added. For x86 CoCo VMs, which don't yet support MMAP, memory is currently private by default (otherwise the memory would be unusable). If MMAP implies memory is shared by default, then the default state for CoCo VMs will vary based on MMAP, and from userspace's perspective, will change when in-place conversion support is added. I.e. to maintain guest<=>host ABI, userspace would need to immediately convert all memory from shared=>private, which is both ugly and inefficient. The inefficiency could be avoided by adding a flag to state that memory is _private_ by default, irrespective of MMAP, but that would lead to an equally messy and hard to document ABI. Bite the bullet and immediately add a flag to control the default state so that the effective behavior is explicit and straightforward. Fixes: 3d3a04fad25a ("KVM: Allow and advertise support for host mmap() on guest_memfd files") Cc: David Hildenbrand Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-3-seanjc@google.com Signed-off-by: Sean Christopherson --- Documentation/virt/kvm/api.rst | 5 +++++ include/uapi/linux/kvm.h | 3 ++- tools/testing/selftests/kvm/guest_memfd_test.c | 15 ++++++++++++--- virt/kvm/guest_memfd.c | 6 +++++- virt/kvm/kvm_main.c | 3 ++- 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 7ba92f2ced38..754b662a453c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6438,6 +6438,11 @@ specified via KVM_CREATE_GUEST_MEMFD. Currently defined flags: ============================ ================================================ GUEST_MEMFD_FLAG_MMAP Enable using mmap() on the guest_memfd file descriptor. + GUEST_MEMFD_FLAG_INIT_SHARED Make all memory in the file shared during + KVM_CREATE_GUEST_MEMFD (memory files created + without INIT_SHARED will be marked private). + Shared memory can be faulted into host userspace + page tables. Private memory cannot. ============================ ================================================ When the KVM MMU performs a PFN lookup to service a guest fault and the backing diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b1d52d0c56ec..52f6000ab020 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1599,7 +1599,8 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) -#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1) struct kvm_create_guest_memfd { __u64 size; diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 3e58bd496104..0de56ce3c4e2 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -239,8 +239,9 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) close(fd1); } -static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags) +static void test_guest_memfd_flags(struct kvm_vm *vm) { + uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); size_t page_size = getpagesize(); uint64_t flag; int fd; @@ -274,6 +275,10 @@ static void test_guest_memfd(unsigned long vm_type) vm = vm_create_barebones_type(vm_type); flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + /* This test doesn't yet support testing mmap() on private memory. */ + if (!(flags & GUEST_MEMFD_FLAG_INIT_SHARED)) + flags &= ~GUEST_MEMFD_FLAG_MMAP; + test_create_guest_memfd_multiple(vm); test_create_guest_memfd_invalid_sizes(vm, flags, page_size); @@ -292,7 +297,7 @@ static void test_guest_memfd(unsigned long vm_type) test_fallocate(fd, page_size, total_size); test_invalid_punch_hole(fd, page_size, total_size); - test_guest_memfd_flags(vm, flags); + test_guest_memfd_flags(vm); close(fd); kvm_vm_free(vm); @@ -334,9 +339,13 @@ static void test_guest_memfd_guest(void) TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP, "Default VM type should support MMAP, supported flags = 0x%x", vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED, + "Default VM type should support INIT_SHARED, supported flags = 0x%x", + vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); size = vm->page_size; - fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP); + fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED); vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 94bafd6c558c..cf3afba23a6b 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -328,6 +328,9 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) return VM_FAULT_SIGBUS; + if (!((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED)) + return VM_FAULT_SIGBUS; + folio = kvm_gmem_get_folio(inode, vmf->pgoff); if (IS_ERR(folio)) { int err = PTR_ERR(folio); @@ -525,7 +528,8 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) u64 valid_flags = 0; if (kvm_arch_supports_gmem_mmap(kvm)) - valid_flags |= GUEST_MEMFD_FLAG_MMAP; + valid_flags |= GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED; if (flags & ~valid_flags) return -EINVAL; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e3a268757621..5f644ca54af3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4930,7 +4930,8 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) return 1; case KVM_CAP_GUEST_MEMFD_FLAGS: if (!kvm || kvm_arch_supports_gmem_mmap(kvm)) - return GUEST_MEMFD_FLAG_MMAP; + return GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED; return 0; #endif -- cgit v1.2.3 From 5d3341d684be80892d8f6f9812f90f9274b81177 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:56 -0700 Subject: KVM: guest_memfd: Invalidate SHARED GPAs if gmem supports INIT_SHARED When invalidating gmem ranges, e.g. in response to PUNCH_HOLE, process all possible range types (PRIVATE vs. SHARED) for the gmem instance. Since since guest_memfd doesn't yet support in-place conversions, simply pivot on INIT_SHARED as a gmem instance can currently only have private or shared memory, not both. Failure to mark shared GPAs for invalidation is benign in the current code base, as only x86's TDX consumes KVM_FILTER_{PRIVATE,SHARED}, and TDX doesn't yet support INIT_SHARED with guest_memfd. However, invalidating only private GPAs is conceptually wrong and a lurking bug, e.g. could result in missed invalidations if ARM starts filtering invalidations based on attributes. Fixes: 3d3a04fad25a ("KVM: Allow and advertise support for host mmap() on guest_memfd files") Reviewed-by: Ackerley Tng Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-4-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/guest_memfd.c | 64 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index cf3afba23a6b..e10d2c71e78c 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -102,8 +102,17 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) return filemap_grab_folio(inode->i_mapping, index); } -static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, - pgoff_t end) +static enum kvm_gfn_range_filter kvm_gmem_get_invalidate_filter(struct inode *inode) +{ + if ((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED) + return KVM_FILTER_SHARED; + + return KVM_FILTER_PRIVATE; +} + +static void __kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, + pgoff_t end, + enum kvm_gfn_range_filter attr_filter) { bool flush = false, found_memslot = false; struct kvm_memory_slot *slot; @@ -118,8 +127,7 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, .end = slot->base_gfn + min(pgoff + slot->npages, end) - pgoff, .slot = slot, .may_block = true, - /* guest memfd is relevant to only private mappings. */ - .attr_filter = KVM_FILTER_PRIVATE, + .attr_filter = attr_filter, }; if (!found_memslot) { @@ -139,8 +147,21 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, KVM_MMU_UNLOCK(kvm); } -static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, - pgoff_t end) +static void kvm_gmem_invalidate_begin(struct inode *inode, pgoff_t start, + pgoff_t end) +{ + struct list_head *gmem_list = &inode->i_mapping->i_private_list; + enum kvm_gfn_range_filter attr_filter; + struct kvm_gmem *gmem; + + attr_filter = kvm_gmem_get_invalidate_filter(inode); + + list_for_each_entry(gmem, gmem_list, entry) + __kvm_gmem_invalidate_begin(gmem, start, end, attr_filter); +} + +static void __kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, + pgoff_t end) { struct kvm *kvm = gmem->kvm; @@ -151,12 +172,20 @@ static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, } } -static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) +static void kvm_gmem_invalidate_end(struct inode *inode, pgoff_t start, + pgoff_t end) { struct list_head *gmem_list = &inode->i_mapping->i_private_list; + struct kvm_gmem *gmem; + + list_for_each_entry(gmem, gmem_list, entry) + __kvm_gmem_invalidate_end(gmem, start, end); +} + +static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) +{ pgoff_t start = offset >> PAGE_SHIFT; pgoff_t end = (offset + len) >> PAGE_SHIFT; - struct kvm_gmem *gmem; /* * Bindings must be stable across invalidation to ensure the start+end @@ -164,13 +193,11 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) */ filemap_invalidate_lock(inode->i_mapping); - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_begin(gmem, start, end); + kvm_gmem_invalidate_begin(inode, start, end); truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1); - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_end(gmem, start, end); + kvm_gmem_invalidate_end(inode, start, end); filemap_invalidate_unlock(inode->i_mapping); @@ -280,8 +307,9 @@ static int kvm_gmem_release(struct inode *inode, struct file *file) * Zap all SPTEs pointed at by this file. Do not free the backing * memory, as its lifetime is associated with the inode, not the file. */ - kvm_gmem_invalidate_begin(gmem, 0, -1ul); - kvm_gmem_invalidate_end(gmem, 0, -1ul); + __kvm_gmem_invalidate_begin(gmem, 0, -1ul, + kvm_gmem_get_invalidate_filter(inode)); + __kvm_gmem_invalidate_end(gmem, 0, -1ul); list_del(&gmem->entry); @@ -403,8 +431,6 @@ static int kvm_gmem_migrate_folio(struct address_space *mapping, static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *folio) { - struct list_head *gmem_list = &mapping->i_private_list; - struct kvm_gmem *gmem; pgoff_t start, end; filemap_invalidate_lock_shared(mapping); @@ -412,8 +438,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol start = folio->index; end = start + folio_nr_pages(folio); - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_begin(gmem, start, end); + kvm_gmem_invalidate_begin(mapping->host, start, end); /* * Do not truncate the range, what action is taken in response to the @@ -424,8 +449,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol * error to userspace. */ - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_end(gmem, start, end); + kvm_gmem_invalidate_end(mapping->host, start, end); filemap_invalidate_unlock_shared(mapping); -- cgit v1.2.3 From 9aef71c892a55e004419923ba7129abe3e58d9f1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:57 -0700 Subject: KVM: Explicitly mark KVM_GUEST_MEMFD as depending on KVM_GENERIC_MMU_NOTIFIER Add KVM_GENERIC_MMU_NOTIFIER as a dependency for selecting KVM_GUEST_MEMFD, as guest_memfd relies on kvm_mmu_invalidate_{begin,end}(), which are defined if and only if the generic mmu_notifier implementation is enabled. The missing dependency is currently benign as s390 is the only KVM arch that doesn't utilize the generic mmu_notifier infrastructure, and s390 doesn't currently support guest_memfd. Fixes: a7800aa80ea4 ("KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory") Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-5-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 1b7d5be0b6c4..a01cc5743137 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -113,6 +113,7 @@ config KVM_GENERIC_MEMORY_ATTRIBUTES bool config KVM_GUEST_MEMFD + depends on KVM_GENERIC_MMU_NOTIFIER select XARRAY_MULTI bool -- cgit v1.2.3 From 44c6cb9fe9888b371e31165b2854bd0f4e2787d4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:58 -0700 Subject: KVM: guest_memfd: Allow mmap() on guest_memfd for x86 VMs with private memory Allow mmap() on guest_memfd instances for x86 VMs with private memory as the need to track private vs. shared state in the guest_memfd instance is only pertinent to INIT_SHARED. Doing mmap() on private memory isn't terrible useful (yet!), but it's now possible, and will be desirable when guest_memfd gains support for other VMA-based syscalls, e.g. mbind() to set NUMA policy. Lift the restriction now, before MMAP support is officially released, so that KVM doesn't need to add another capability to enumerate support for mmap() on private memory. Fixes: 3d3a04fad25a ("KVM: Allow and advertise support for host mmap() on guest_memfd files") Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 7 ++++--- include/linux/kvm_host.h | 12 +++++++++++- virt/kvm/guest_memfd.c | 9 ++------- virt/kvm/kvm_main.c | 6 +----- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4b8138bd4857..fe3dc3eb4331 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13942,10 +13942,11 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) #ifdef CONFIG_KVM_GUEST_MEMFD /* - * KVM doesn't yet support mmap() on guest_memfd for VMs with private memory - * (the private vs. shared tracking needs to be moved into guest_memfd). + * KVM doesn't yet support initializing guest_memfd memory as shared for VMs + * with private memory (the private vs. shared tracking needs to be moved into + * guest_memfd). */ -bool kvm_arch_supports_gmem_mmap(struct kvm *kvm) +bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm) { return !kvm_arch_has_private_mem(kvm); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 19b8c4bebb9c..680ca838f018 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -729,7 +729,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) #endif #ifdef CONFIG_KVM_GUEST_MEMFD -bool kvm_arch_supports_gmem_mmap(struct kvm *kvm); +bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm); + +static inline u64 kvm_gmem_get_supported_flags(struct kvm *kvm) +{ + u64 flags = GUEST_MEMFD_FLAG_MMAP; + + if (!kvm || kvm_arch_supports_gmem_init_shared(kvm)) + flags |= GUEST_MEMFD_FLAG_INIT_SHARED; + + return flags; +} #endif #ifndef kvm_arch_has_readonly_mem diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index e10d2c71e78c..fbca8c0972da 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -485,7 +485,7 @@ static const struct inode_operations kvm_gmem_iops = { .setattr = kvm_gmem_setattr, }; -bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm) +bool __weak kvm_arch_supports_gmem_init_shared(struct kvm *kvm) { return true; } @@ -549,13 +549,8 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) { loff_t size = args->size; u64 flags = args->flags; - u64 valid_flags = 0; - if (kvm_arch_supports_gmem_mmap(kvm)) - valid_flags |= GUEST_MEMFD_FLAG_MMAP | - GUEST_MEMFD_FLAG_INIT_SHARED; - - if (flags & ~valid_flags) + if (flags & ~kvm_gmem_get_supported_flags(kvm)) return -EINVAL; if (size <= 0 || !PAGE_ALIGNED(size)) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5f644ca54af3..b7a0ae2a7b20 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4929,11 +4929,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_GUEST_MEMFD: return 1; case KVM_CAP_GUEST_MEMFD_FLAGS: - if (!kvm || kvm_arch_supports_gmem_mmap(kvm)) - return GUEST_MEMFD_FLAG_MMAP | - GUEST_MEMFD_FLAG_INIT_SHARED; - - return 0; + return kvm_gmem_get_supported_flags(kvm); #endif default: break; -- cgit v1.2.3 From 3a6c08538c742624c60cb6a53dd61eb025e0d1e1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:59 -0700 Subject: KVM: selftests: Stash the host page size in a global in the guest_memfd test Use a global variable to track the host page size in the guest_memfd test so that the information doesn't need to be constantly passed around. The state is purely a reflection of the underlying system, i.e. can't be set by the test and is constant for a given invocation of the test, and thus explicitly passing the host page size to individual testcases adds no value, e.g. doesn't allow testing different combinations. Making page_size a global will simplify an upcoming change to create a new guest_memfd instance per testcase. No functional change intended. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: David Hildenbrand Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Link: https://lore.kernel.org/r/20251003232606.4070510-7-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/guest_memfd_test.c | 37 +++++++++++++------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 0de56ce3c4e2..a7c9601bd31e 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -24,6 +24,8 @@ #include "test_util.h" #include "ucall_common.h" +static size_t page_size; + static void test_file_read_write(int fd) { char buf[64]; @@ -38,7 +40,7 @@ static void test_file_read_write(int fd) "pwrite on a guest_mem fd should fail"); } -static void test_mmap_supported(int fd, size_t page_size, size_t total_size) +static void test_mmap_supported(int fd, size_t total_size) { const char val = 0xaa; char *mem; @@ -78,7 +80,7 @@ void fault_sigbus_handler(int signum) siglongjmp(jmpbuf, 1); } -static void test_fault_overflow(int fd, size_t page_size, size_t total_size) +static void test_fault_overflow(int fd, size_t total_size) { struct sigaction sa_old, sa_new = { .sa_handler = fault_sigbus_handler, @@ -106,7 +108,7 @@ static void test_fault_overflow(int fd, size_t page_size, size_t total_size) TEST_ASSERT(!ret, "munmap() should succeed."); } -static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size) +static void test_mmap_not_supported(int fd, size_t total_size) { char *mem; @@ -117,7 +119,7 @@ static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size) TEST_ASSERT_EQ(mem, MAP_FAILED); } -static void test_file_size(int fd, size_t page_size, size_t total_size) +static void test_file_size(int fd, size_t total_size) { struct stat sb; int ret; @@ -128,7 +130,7 @@ static void test_file_size(int fd, size_t page_size, size_t total_size) TEST_ASSERT_EQ(sb.st_blksize, page_size); } -static void test_fallocate(int fd, size_t page_size, size_t total_size) +static void test_fallocate(int fd, size_t total_size) { int ret; @@ -165,7 +167,7 @@ static void test_fallocate(int fd, size_t page_size, size_t total_size) TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed"); } -static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) +static void test_invalid_punch_hole(int fd, size_t total_size) { struct { off_t offset; @@ -196,8 +198,7 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) } static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm, - uint64_t guest_memfd_flags, - size_t page_size) + uint64_t guest_memfd_flags) { size_t size; int fd; @@ -214,7 +215,6 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) { int fd1, fd2, ret; struct stat st1, st2; - size_t page_size = getpagesize(); fd1 = __vm_create_guest_memfd(vm, page_size, 0); TEST_ASSERT(fd1 != -1, "memfd creation should succeed"); @@ -242,7 +242,6 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) static void test_guest_memfd_flags(struct kvm_vm *vm) { uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); - size_t page_size = getpagesize(); uint64_t flag; int fd; @@ -265,11 +264,9 @@ static void test_guest_memfd(unsigned long vm_type) { struct kvm_vm *vm; size_t total_size; - size_t page_size; uint64_t flags; int fd; - page_size = getpagesize(); total_size = page_size * 4; vm = vm_create_barebones_type(vm_type); @@ -280,22 +277,22 @@ static void test_guest_memfd(unsigned long vm_type) flags &= ~GUEST_MEMFD_FLAG_MMAP; test_create_guest_memfd_multiple(vm); - test_create_guest_memfd_invalid_sizes(vm, flags, page_size); + test_create_guest_memfd_invalid_sizes(vm, flags); fd = vm_create_guest_memfd(vm, total_size, flags); test_file_read_write(fd); if (flags & GUEST_MEMFD_FLAG_MMAP) { - test_mmap_supported(fd, page_size, total_size); - test_fault_overflow(fd, page_size, total_size); + test_mmap_supported(fd, total_size); + test_fault_overflow(fd, total_size); } else { - test_mmap_not_supported(fd, page_size, total_size); + test_mmap_not_supported(fd, total_size); } - test_file_size(fd, page_size, total_size); - test_fallocate(fd, page_size, total_size); - test_invalid_punch_hole(fd, page_size, total_size); + test_file_size(fd, total_size); + test_fallocate(fd, total_size); + test_invalid_punch_hole(fd, total_size); test_guest_memfd_flags(vm); @@ -374,6 +371,8 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + page_size = getpagesize(); + /* * Not all architectures support KVM_CAP_VM_TYPES. However, those that * support guest_memfd have that support for the default VM type. -- cgit v1.2.3 From 21d602ed616aebae4de568be55db65a1f5a3be10 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:26:00 -0700 Subject: KVM: selftests: Create a new guest_memfd for each testcase Refactor the guest_memfd selftest to improve test isolation by creating a a new guest_memfd for each testcase. Currently, the test reuses a single guest_memfd instance for all testcases, and thus creates dependencies between tests, e.g. not truncating folios from the guest_memfd instance at the end of a test could lead to unexpected results (see the PUNCH_HOLE purging that needs to done by in-flight the NUMA testcases[1]). Invoke each test via a macro wrapper to create and close a guest_memfd to cut down on the boilerplate copy+paste needed to create a test. Link: https://lore.kernel.org/all/20250827175247.83322-10-shivankg@amd.com Reported-by: Ackerley Tng Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: David Hildenbrand Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Link: https://lore.kernel.org/r/20251003232606.4070510-8-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/guest_memfd_test.c | 31 +++++++++++++------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index a7c9601bd31e..afdc4d3a956d 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -26,7 +26,7 @@ static size_t page_size; -static void test_file_read_write(int fd) +static void test_file_read_write(int fd, size_t total_size) { char buf[64]; @@ -260,14 +260,18 @@ static void test_guest_memfd_flags(struct kvm_vm *vm) } } +#define gmem_test(__test, __vm, __flags) \ +do { \ + int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \ + \ + test_##__test(fd, page_size * 4); \ + close(fd); \ +} while (0) + static void test_guest_memfd(unsigned long vm_type) { struct kvm_vm *vm; - size_t total_size; uint64_t flags; - int fd; - - total_size = page_size * 4; vm = vm_create_barebones_type(vm_type); flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); @@ -279,24 +283,21 @@ static void test_guest_memfd(unsigned long vm_type) test_create_guest_memfd_multiple(vm); test_create_guest_memfd_invalid_sizes(vm, flags); - fd = vm_create_guest_memfd(vm, total_size, flags); - - test_file_read_write(fd); + gmem_test(file_read_write, vm, flags); if (flags & GUEST_MEMFD_FLAG_MMAP) { - test_mmap_supported(fd, total_size); - test_fault_overflow(fd, total_size); + gmem_test(mmap_supported, vm, flags); + gmem_test(fault_overflow, vm, flags); } else { - test_mmap_not_supported(fd, total_size); + gmem_test(mmap_not_supported, vm, flags); } - test_file_size(fd, total_size); - test_fallocate(fd, total_size); - test_invalid_punch_hole(fd, total_size); + gmem_test(file_size, vm, flags); + gmem_test(fallocate, vm, flags); + gmem_test(invalid_punch_hole, vm, flags); test_guest_memfd_flags(vm); - close(fd); kvm_vm_free(vm); } -- cgit v1.2.3 From df0d9923f7055169cb01b050236e5a9a2b36db02 Mon Sep 17 00:00:00 2001 From: Ackerley Tng Date: Fri, 3 Oct 2025 16:26:01 -0700 Subject: KVM: selftests: Add test coverage for guest_memfd without GUEST_MEMFD_FLAG_MMAP If a VM type supports KVM_CAP_GUEST_MEMFD_MMAP, the guest_memfd test will run all test cases with GUEST_MEMFD_FLAG_MMAP set. This leaves the code path for creating a non-mmap()-able guest_memfd on a VM that supports mappable guest memfds untested. Refactor the test to run the main test suite with a given set of flags. Then, for VM types that support the mappable capability, invoke the test suite twice: once with no flags, and once with GUEST_MEMFD_FLAG_MMAP set. This ensures both creation paths are properly exercised on capable VMs. Run test_guest_memfd_flags() only once per VM type since it depends only on the set of valid/supported flags, i.e. iterating over an arbitrary set of flags is both unnecessary and wrong. Signed-off-by: Ackerley Tng [sean: use double-underscores for the inner helper] Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-9-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/guest_memfd_test.c | 27 +++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index afdc4d3a956d..9f98a067ab51 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -268,18 +268,8 @@ do { \ close(fd); \ } while (0) -static void test_guest_memfd(unsigned long vm_type) +static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) { - struct kvm_vm *vm; - uint64_t flags; - - vm = vm_create_barebones_type(vm_type); - flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); - - /* This test doesn't yet support testing mmap() on private memory. */ - if (!(flags & GUEST_MEMFD_FLAG_INIT_SHARED)) - flags &= ~GUEST_MEMFD_FLAG_MMAP; - test_create_guest_memfd_multiple(vm); test_create_guest_memfd_invalid_sizes(vm, flags); @@ -295,9 +285,24 @@ static void test_guest_memfd(unsigned long vm_type) gmem_test(file_size, vm, flags); gmem_test(fallocate, vm, flags); gmem_test(invalid_punch_hole, vm, flags); +} + +static void test_guest_memfd(unsigned long vm_type) +{ + struct kvm_vm *vm = vm_create_barebones_type(vm_type); + uint64_t flags; test_guest_memfd_flags(vm); + __test_guest_memfd(vm, 0); + + flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + + /* MMAP should always be supported if INIT_SHARED is supported. */ + if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) + __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED); + kvm_vm_free(vm); } -- cgit v1.2.3 From 61cee97f40180312dcca9580a5be0b0aa2217f6e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:26:02 -0700 Subject: KVM: selftests: Add wrappers for mmap() and munmap() to assert success Add and use wrappers for mmap() and munmap() that assert success to reduce a significant amount of boilerplate code, to ensure all tests assert on failure, and to provide consistent error messages on failure. No functional change intended. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: David Hildenbrand Reviewed-by: Ackerley Tng Link: https://lore.kernel.org/r/20251003232606.4070510-10-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/guest_memfd_test.c | 21 ++++------- tools/testing/selftests/kvm/include/kvm_util.h | 25 ++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 44 ++++++++-------------- tools/testing/selftests/kvm/mmu_stress_test.c | 5 +-- tools/testing/selftests/kvm/s390/ucontrol_test.c | 16 ++++---- .../testing/selftests/kvm/set_memory_region_test.c | 17 ++++----- 6 files changed, 64 insertions(+), 64 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 9f98a067ab51..319fda4f5d53 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -50,8 +50,7 @@ static void test_mmap_supported(int fd, size_t total_size) mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); - mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed."); + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); memset(mem, val, total_size); for (i = 0; i < total_size; i++) @@ -70,8 +69,7 @@ static void test_mmap_supported(int fd, size_t total_size) for (i = 0; i < total_size; i++) TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); - ret = munmap(mem, total_size); - TEST_ASSERT(!ret, "munmap() should succeed."); + kvm_munmap(mem, total_size); } static sigjmp_buf jmpbuf; @@ -89,10 +87,8 @@ static void test_fault_overflow(int fd, size_t total_size) const char val = 0xaa; char *mem; size_t i; - int ret; - mem = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed."); + mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); sigaction(SIGBUS, &sa_new, &sa_old); if (sigsetjmp(jmpbuf, 1) == 0) { @@ -104,8 +100,7 @@ static void test_fault_overflow(int fd, size_t total_size) for (i = 0; i < total_size; i++) TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); - ret = munmap(mem, map_size); - TEST_ASSERT(!ret, "munmap() should succeed."); + kvm_munmap(mem, map_size); } static void test_mmap_not_supported(int fd, size_t total_size) @@ -351,10 +346,9 @@ static void test_guest_memfd_guest(void) GUEST_MEMFD_FLAG_INIT_SHARED); vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); - mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed"); + mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); memset(mem, 0xaa, size); - munmap(mem, size); + kvm_munmap(mem, size); virt_pg_map(vm, gpa, gpa); vcpu_args_set(vcpu, 2, gpa, size); @@ -362,8 +356,7 @@ static void test_guest_memfd_guest(void) TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); - mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed"); + mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); for (i = 0; i < size; i++) TEST_ASSERT_EQ(mem[i], 0xff); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 26cc30290e76..ee60dbf5208a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -286,6 +286,31 @@ static inline bool kvm_has_cap(long cap) #define __KVM_SYSCALL_ERROR(_name, _ret) \ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) +static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd, + off_t offset) +{ + void *mem; + + mem = mmap(NULL, size, prot, flags, fd, offset); + TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()", + (int)(unsigned long)MAP_FAILED)); + + return mem; +} + +static inline void *kvm_mmap(size_t size, int prot, int flags, int fd) +{ + return __kvm_mmap(size, prot, flags, fd, 0); +} + +static inline void kvm_munmap(void *mem, size_t size) +{ + int ret; + + ret = munmap(mem, size); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); +} + /* * Use the "inner", double-underscore macro when reporting errors from within * other macros so that the name of ioctl() and not its literal numeric value diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6743fbd9bd67..83a721be7ec5 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -741,13 +741,11 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) int ret; if (vcpu->dirty_gfns) { - ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size); vcpu->dirty_gfns = NULL; } - ret = munmap(vcpu->run, vcpu_mmap_sz()); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->run, vcpu_mmap_sz()); ret = close(vcpu->fd); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); @@ -783,20 +781,16 @@ void kvm_vm_release(struct kvm_vm *vmp) static void __vm_mem_region_delete(struct kvm_vm *vm, struct userspace_mem_region *region) { - int ret; - rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); rb_erase(®ion->hva_node, &vm->regions.hva_tree); hash_del(®ion->slot_node); sparsebit_free(®ion->unused_phy_pages); sparsebit_free(®ion->protected_phy_pages); - ret = munmap(region->mmap_start, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_start, region->mmap_size); if (region->fd >= 0) { /* There's an extra map when using shared memory. */ - ret = munmap(region->mmap_alias, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_alias, region->mmap_size); close(region->fd); } if (region->region.guest_memfd >= 0) @@ -1053,12 +1047,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, region->fd = kvm_memfd_alloc(region->mmap_size, src_type == VM_MEM_SRC_SHARED_HUGETLB); - region->mmap_start = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_start != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); TEST_ASSERT(!is_backing_src_hugetlb(src_type) || region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), @@ -1129,12 +1120,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, /* If shared memory, create an alias. */ if (region->fd >= 0) { - region->mmap_alias = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_alias != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_alias = kvm_mmap(region->mmap_size, + PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); /* Align host alias address */ region->host_alias = align_ptr_up(region->mmap_alias, alignment); @@ -1344,10 +1333,8 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->run)); - vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), - PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); - TEST_ASSERT(vcpu->run != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE, + MAP_SHARED, vcpu->fd); if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) vcpu->stats.fd = vcpu_get_stats_fd(vcpu); @@ -1794,9 +1781,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) page_size * KVM_DIRTY_LOG_PAGE_OFFSET); TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); - addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, - page_size * KVM_DIRTY_LOG_PAGE_OFFSET); - TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); + addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, + page_size * KVM_DIRTY_LOG_PAGE_OFFSET); vcpu->dirty_gfns = addr; vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 6a437d2be9fa..37b7e6524533 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -339,8 +339,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); fd = kvm_memfd_alloc(slot_size, hugepages); - mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); + mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed"); @@ -413,7 +412,7 @@ int main(int argc, char *argv[]) for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2) vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL); - munmap(mem, slot_size / 2); + kvm_munmap(mem, slot_size / 2); /* Sanity check that the vCPUs actually ran. */ for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index d265b34c54be..50bc1c38225a 100644 --- a/tools/testing/selftests/kvm/s390/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -142,19 +142,17 @@ FIXTURE_SETUP(uc_kvm) self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run)) TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size)); - self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size, - PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); - ASSERT_NE(self->run, MAP_FAILED); + self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE, + MAP_SHARED, self->vcpu_fd); /** * For virtual cpus that have been created with S390 user controlled * virtual machines, the resulting vcpu fd can be memory mapped at page * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of * the virtual cpu's hardware control block. */ - self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE, - PROT_READ | PROT_WRITE, MAP_SHARED, - self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); - ASSERT_NE(self->sie_block, MAP_FAILED); + self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, self->vcpu_fd, + KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); TH_LOG("VM created %p %p", self->run, self->sie_block); @@ -186,8 +184,8 @@ FIXTURE_SETUP(uc_kvm) FIXTURE_TEARDOWN(uc_kvm) { - munmap(self->sie_block, PAGE_SIZE); - munmap(self->run, self->kvm_run_size); + kvm_munmap(self->sie_block, PAGE_SIZE); + kvm_munmap(self->run, self->kvm_run_size); close(self->vcpu_fd); close(self->vm_fd); close(self->kvm_fd); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index ce3ac0fd6dfb..7fe427ff9b38 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -433,10 +433,10 @@ static void test_add_max_memory_regions(void) pr_info("Adding slots 0..%i, each memory region with %dK size\n", (max_mem_slots - 1), MEM_REGION_SIZE >> 10); - mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); - TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); + + mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); for (slot = 0; slot < max_mem_slots; slot++) @@ -446,9 +446,8 @@ static void test_add_max_memory_regions(void) mem_aligned + (uint64_t)slot * MEM_REGION_SIZE); /* Check it cannot be added memory slots beyond the limit */ - mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host"); + mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); ret = __vm_set_user_memory_region(vm, max_mem_slots, 0, (uint64_t)max_mem_slots * MEM_REGION_SIZE, @@ -456,8 +455,8 @@ static void test_add_max_memory_regions(void) TEST_ASSERT(ret == -1 && errno == EINVAL, "Adding one more memory slot should fail with EINVAL"); - munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); - munmap(mem_extra, MEM_REGION_SIZE); + kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); + kvm_munmap(mem_extra, MEM_REGION_SIZE); kvm_vm_free(vm); } -- cgit v1.2.3 From 505c953009ec8260870d41ef8109bb4c7e208e6f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:26:03 -0700 Subject: KVM: selftests: Isolate the guest_memfd Copy-on-Write negative testcase Move the guest_memfd Copy-on-Write (CoW) testcase to its own function to better separate positive testcases from negative testcases. No functional change intended. Suggested-by: Ackerley Tng Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Link: https://lore.kernel.org/r/20251003232606.4070510-11-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/guest_memfd_test.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 319fda4f5d53..640636c76eb9 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -40,6 +40,14 @@ static void test_file_read_write(int fd, size_t total_size) "pwrite on a guest_mem fd should fail"); } +static void test_mmap_cow(int fd, size_t size) +{ + void *mem; + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); +} + static void test_mmap_supported(int fd, size_t total_size) { const char val = 0xaa; @@ -47,9 +55,6 @@ static void test_mmap_supported(int fd, size_t total_size) size_t i; int ret; - mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); - TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); - mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); memset(mem, val, total_size); @@ -272,6 +277,7 @@ static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) if (flags & GUEST_MEMFD_FLAG_MMAP) { gmem_test(mmap_supported, vm, flags); + gmem_test(mmap_cow, vm, flags); gmem_test(fault_overflow, vm, flags); } else { gmem_test(mmap_not_supported, vm, flags); -- cgit v1.2.3 From f91187c0ecc6358ccecf533c5fcc6b7dbb4735cb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:26:04 -0700 Subject: KVM: selftests: Add wrapper macro to handle and assert on expected SIGBUS Extract the guest_memfd test's SIGBUS handling functionality into a common TEST_EXPECT_SIGBUS() macro in anticipation of adding more SIGBUS testcases. Eating a SIGBUS isn't terrible difficult, but it requires a non-trivial amount of boilerplate code, and using a macro allows selftests to print out the exact action that failed to generate a SIGBUS without the developer needing to remember to add a useful error message. Explicitly mark the SIGBUS handler as "used", as gcc-14 at least likes to discard the function before linking. Opportunistically use TEST_FAIL(...) instead of TEST_ASSERT(false, ...), and fix the write path of the guest_memfd test to use the local "val" instead of hardcoding the literal value a second time. Suggested-by: Ackerley Tng Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: Lisa Wang Tested-by: Lisa Wang Link: https://lore.kernel.org/r/20251003232606.4070510-12-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/guest_memfd_test.c | 18 +----------------- tools/testing/selftests/kvm/include/test_util.h | 19 +++++++++++++++++++ tools/testing/selftests/kvm/lib/test_util.c | 7 +++++++ 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 640636c76eb9..73c2e54e7297 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -14,8 +14,6 @@ #include #include #include -#include -#include #include #include #include @@ -77,17 +75,8 @@ static void test_mmap_supported(int fd, size_t total_size) kvm_munmap(mem, total_size); } -static sigjmp_buf jmpbuf; -void fault_sigbus_handler(int signum) -{ - siglongjmp(jmpbuf, 1); -} - static void test_fault_overflow(int fd, size_t total_size) { - struct sigaction sa_old, sa_new = { - .sa_handler = fault_sigbus_handler, - }; size_t map_size = total_size * 4; const char val = 0xaa; char *mem; @@ -95,12 +84,7 @@ static void test_fault_overflow(int fd, size_t total_size) mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); - sigaction(SIGBUS, &sa_new, &sa_old); - if (sigsetjmp(jmpbuf, 1) == 0) { - memset(mem, 0xaa, map_size); - TEST_ASSERT(false, "memset() should have triggered SIGBUS."); - } - sigaction(SIGBUS, &sa_old, NULL); + TEST_EXPECT_SIGBUS(memset(mem, val, map_size)); for (i = 0; i < total_size; i++) TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index c6ef895fbd9a..b4872ba8ed12 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -8,6 +8,8 @@ #ifndef SELFTEST_KVM_TEST_UTIL_H #define SELFTEST_KVM_TEST_UTIL_H +#include +#include #include #include #include @@ -78,6 +80,23 @@ do { \ __builtin_unreachable(); \ } while (0) +extern sigjmp_buf expect_sigbus_jmpbuf; +void expect_sigbus_handler(int signum); + +#define TEST_EXPECT_SIGBUS(action) \ +do { \ + struct sigaction sa_old, sa_new = { \ + .sa_handler = expect_sigbus_handler, \ + }; \ + \ + sigaction(SIGBUS, &sa_new, &sa_old); \ + if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \ + action; \ + TEST_FAIL("'%s' should have triggered SIGBUS", #action); \ + } \ + sigaction(SIGBUS, &sa_old, NULL); \ +} while (0) + size_t parse_size(const char *size); int64_t timespec_to_ns(struct timespec ts); diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 03eb99af9b8d..8a1848586a85 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -18,6 +18,13 @@ #include "test_util.h" +sigjmp_buf expect_sigbus_jmpbuf; + +void __attribute__((used)) expect_sigbus_handler(int signum) +{ + siglongjmp(expect_sigbus_jmpbuf, 1); +} + /* * Random number generator that is usable from guest code. This is the * Park-Miller LCG using standard constants. -- cgit v1.2.3 From 19942d4fd9cf022b28a9afb432a6338dcf96fc2f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:26:05 -0700 Subject: KVM: selftests: Verify that faulting in private guest_memfd memory fails Add a guest_memfd testcase to verify that faulting in private memory gets a SIGBUS. For now, test only the case where memory is private by default since KVM doesn't yet support in-place conversion. Deliberately run the CoW test with and without INIT_SHARED set as KVM should disallow MAP_PRIVATE regardless of whether the memory itself is private from a CoCo perspective. Cc: Ackerley Tng Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: David Hildenbrand Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Link: https://lore.kernel.org/r/20251003232606.4070510-13-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/guest_memfd_test.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 73c2e54e7297..f5372fdf096d 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -75,9 +75,8 @@ static void test_mmap_supported(int fd, size_t total_size) kvm_munmap(mem, total_size); } -static void test_fault_overflow(int fd, size_t total_size) +static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size) { - size_t map_size = total_size * 4; const char val = 0xaa; char *mem; size_t i; @@ -86,12 +85,22 @@ static void test_fault_overflow(int fd, size_t total_size) TEST_EXPECT_SIGBUS(memset(mem, val, map_size)); - for (i = 0; i < total_size; i++) + for (i = 0; i < accessible_size; i++) TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); kvm_munmap(mem, map_size); } +static void test_fault_overflow(int fd, size_t total_size) +{ + test_fault_sigbus(fd, total_size, total_size * 4); +} + +static void test_fault_private(int fd, size_t total_size) +{ + test_fault_sigbus(fd, 0, total_size); +} + static void test_mmap_not_supported(int fd, size_t total_size) { char *mem; @@ -260,9 +269,14 @@ static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) gmem_test(file_read_write, vm, flags); if (flags & GUEST_MEMFD_FLAG_MMAP) { - gmem_test(mmap_supported, vm, flags); + if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) { + gmem_test(mmap_supported, vm, flags); + gmem_test(fault_overflow, vm, flags); + } else { + gmem_test(fault_private, vm, flags); + } + gmem_test(mmap_cow, vm, flags); - gmem_test(fault_overflow, vm, flags); } else { gmem_test(mmap_not_supported, vm, flags); } @@ -282,6 +296,8 @@ static void test_guest_memfd(unsigned long vm_type) __test_guest_memfd(vm, 0); flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + if (flags & GUEST_MEMFD_FLAG_MMAP) + __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP); /* MMAP should always be supported if INIT_SHARED is supported. */ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) -- cgit v1.2.3 From 505f5224b197b77169c977e747cbc18b222f85f9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:26:06 -0700 Subject: KVM: selftests: Verify that reads to inaccessible guest_memfd VMAs SIGBUS Expand the guest_memfd negative testcases for overflow and MAP_PRIVATE to verify that reads to inaccessible memory also get a SIGBUS. Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: Lisa Wang Tested-by: Lisa Wang Link: https://lore.kernel.org/r/20251003232606.4070510-14-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/guest_memfd_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index f5372fdf096d..e7d9aeb418d3 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -84,6 +84,7 @@ static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size) mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); TEST_EXPECT_SIGBUS(memset(mem, val, map_size)); + TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size])); for (i = 0; i < accessible_size; i++) TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); -- cgit v1.2.3 From 852947be66b826c3d0ba328e19a3559fdf7ac726 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Fri, 10 Oct 2025 15:50:24 -0600 Subject: riscv: kprobes: convert one final __ASSEMBLY__ to __ASSEMBLER__ Per the reasoning in commit f811f58597ac ("riscv: Replace __ASSEMBLY__ with __ASSEMBLER__ in non-uapi headers"), convert one last remaining instance of __ASSEMBLY__ in the arch/riscv kprobes code. This entered the tree from patches that were sent before Thomas' changes; and when I reviewed the kprobes patches before queuing them, I missed this instance. Cc: Nam Cao Cc: Thomas Huth Link: https://lore.kernel.org/linux-riscv/16b74b63-f223-4f0b-b6e5-31cea5e620b4@redhat.com/ Link: https://lore.kernel.org/linux-riscv/20250606070952.498274-1-thuth@redhat.com/ Signed-off-by: Paul Walmsley --- arch/riscv/kernel/tests/kprobes/test-kprobes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/tests/kprobes/test-kprobes.h b/arch/riscv/kernel/tests/kprobes/test-kprobes.h index 3886ab491ecb..537f44aa9d3f 100644 --- a/arch/riscv/kernel/tests/kprobes/test-kprobes.h +++ b/arch/riscv/kernel/tests/kprobes/test-kprobes.h @@ -11,7 +11,7 @@ #define KPROBE_TEST_MAGIC_LOWER 0x0000babe #define KPROBE_TEST_MAGIC_UPPER 0xcafe0000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* array of addresses to install kprobes */ extern void *test_kprobes_addresses[]; @@ -19,6 +19,6 @@ extern void *test_kprobes_addresses[]; /* array of functions that return KPROBE_TEST_MAGIC */ extern long (*test_kprobes_functions[])(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* TEST_KPROBES_H */ -- cgit v1.2.3 From 8527bbb33936340525a3504a00932b2f8fd75754 Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Tue, 7 Oct 2025 10:38:31 +0300 Subject: ALSA: hda: cs35l41: Fix NULL pointer dereference in cs35l41_get_acpi_mute_state() Return value of a function acpi_evaluate_dsm() is dereferenced without checking for NULL, but it is usually checked for this function. acpi_evaluate_dsm() may return NULL, when acpi_evaluate_object() returns acpi_status other than ACPI_SUCCESS, so add a check to prevent the crach. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 447106e92a0c ("ALSA: hda: cs35l41: Support mute notifications for CS35L41 HDA") Cc: stable@vger.kernel.org Signed-off-by: Denis Arefev Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/cs35l41_hda.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/hda/codecs/side-codecs/cs35l41_hda.c b/sound/hda/codecs/side-codecs/cs35l41_hda.c index c04208e685a0..c0f2a3ff77a1 100644 --- a/sound/hda/codecs/side-codecs/cs35l41_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l41_hda.c @@ -1410,6 +1410,8 @@ static int cs35l41_get_acpi_mute_state(struct cs35l41_hda *cs35l41, acpi_handle if (cs35l41_dsm_supported(handle, CS35L41_DSM_GET_MUTE)) { ret = acpi_evaluate_dsm(handle, &guid, 0, CS35L41_DSM_GET_MUTE, NULL); + if (!ret) + return -EINVAL; mute = *ret->buffer.pointer; dev_dbg(cs35l41->dev, "CS35L41_DSM_GET_MUTE: %d\n", mute); } -- cgit v1.2.3 From 1cf11d80db5df805b538c942269e05a65bcaf5bc Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Thu, 9 Oct 2025 13:50:47 +0300 Subject: ALSA: hda: Fix missing pointer check in hda_component_manager_init function The __component_match_add function may assign the 'matchptr' pointer the value ERR_PTR(-ENOMEM), which will subsequently be dereferenced. The call stack leading to the error looks like this: hda_component_manager_init |-> component_match_add |-> component_match_add_release |-> __component_match_add ( ... ,**matchptr, ... ) |-> *matchptr = ERR_PTR(-ENOMEM); // assign |-> component_master_add_with_match( ... match) |-> component_match_realloc(match, match->num); // dereference Add IS_ERR() check to prevent the crash. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: ae7abe36e352 ("ALSA: hda/realtek: Add CS35L41 support for Thinkpad laptops") Cc: stable@vger.kernel.org Signed-off-by: Denis Arefev Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/hda_component.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/hda/codecs/side-codecs/hda_component.c b/sound/hda/codecs/side-codecs/hda_component.c index bcf47a301697..8a2a200600a7 100644 --- a/sound/hda/codecs/side-codecs/hda_component.c +++ b/sound/hda/codecs/side-codecs/hda_component.c @@ -174,6 +174,10 @@ int hda_component_manager_init(struct hda_codec *cdc, sm->match_str = match_str; sm->index = i; component_match_add(dev, &match, hda_comp_match_dev_name, sm); + if (IS_ERR(match)) { + codec_err(cdc, "Fail to add component %ld\n", PTR_ERR(match)); + return PTR_ERR(match); + } } ret = component_master_add_with_match(dev, ops, match); -- cgit v1.2.3 From 30b3211aa24161856134b2c2ea2ab1c6eb534b36 Mon Sep 17 00:00:00 2001 From: Stuart Hayhurst Date: Sat, 11 Oct 2025 00:28:29 +0100 Subject: ALSA: hda/intel: Add MSI X870E Tomahawk to denylist This motherboard uses USB audio instead, causing this driver to complain about "no codecs found!". Add it to the denylist to silence the warning. Signed-off-by: Stuart Hayhurst Cc: Signed-off-by: Takashi Iwai --- sound/hda/controllers/intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c index 48c52a207024..a19258c95886 100644 --- a/sound/hda/controllers/intel.c +++ b/sound/hda/controllers/intel.c @@ -2075,6 +2075,7 @@ static const struct pci_device_id driver_denylist[] = { { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */ + { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1462, 0xee59) }, /* MSI X870E Tomahawk WiFi */ {} }; -- cgit v1.2.3 From 77908b81766781dfcd086878aefc29f5db8dae6a Mon Sep 17 00:00:00 2001 From: Cryolitia PukNgae Date: Sat, 11 Oct 2025 17:51:18 +0800 Subject: ALSA: usb-audio: apply quirk for Huawei Technologies Co., Ltd. CM-Q3 There're several different actual hardwares sold by Huawei, using the same USB ID 12d1:3a07. The first one we found, having a volume control named "Headset Playback Volume", reports a min value -15360, and will mute iff setting it to -15360. It can be simply fixed by quirk flag MIXER_PLAYBACK_MIN_MUTE, which we have already submitted previously.[1] The second one we found today, having a volume control named "PCM Playback Volume", reports its min -11520 and res 256, and will mute when less than -11008. Because of the already existing quirk flag, we can just set its min to -11264, and the new minimum value will still not be available to userspace, so that userspace's minimum will be the correct -11008. 1. https://lore.kernel.org/all/20250903-sound-v1-3-d4ca777b8512@uniontech.com/ Tested-by: Guoli An Signed-off-by: Cryolitia PukNgae Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 34bcbfd8b54e..ae412e651faf 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1189,6 +1189,13 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, cval->min = -14208; /* Mute under it */ } break; + case USB_ID(0x12d1, 0x3a07): /* Huawei Technologies Co., Ltd. CM-Q3 */ + if (!strcmp(kctl->id.name, "PCM Playback Volume")) { + usb_audio_info(chip, + "set volume quirk for Huawei Technologies Co., Ltd. CM-Q3\n"); + cval->min = -11264; /* Mute under it */ + } + break; } } -- cgit v1.2.3 From db74b04edce1bc86b9a5acc724c7ca06f427ab60 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 11 Oct 2025 12:59:53 +0200 Subject: drm/bridge: lt9211: Drop check for last nibble of version register There is now a new LT9211 rev. U5, which reports chip ID 0x18 0x01 0xe4 . The previous LT9211 reported chip ID 0x18 0x01 0xe3 , which is what the driver checks for right now. Since there is a possibility there will be yet another revision of the LT9211 in the future, drop the last version nibble check to allow all future revisions of the chip to work with this driver. This fix makes LT9211 rev. U5 work with this driver. Fixes: 8ce4129e3de4 ("drm/bridge: lt9211: Add Lontium LT9211 bridge driver") Signed-off-by: Marek Vasut Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20251011110017.12521-1-marek.vasut@mailbox.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/bridge/lontium-lt9211.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/lontium-lt9211.c b/drivers/gpu/drm/bridge/lontium-lt9211.c index 399fa7eebd49..03fc8fd10f20 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9211.c +++ b/drivers/gpu/drm/bridge/lontium-lt9211.c @@ -121,8 +121,7 @@ static int lt9211_read_chipid(struct lt9211 *ctx) } /* Test for known Chip ID. */ - if (chipid[0] != REG_CHIPID0_VALUE || chipid[1] != REG_CHIPID1_VALUE || - chipid[2] != REG_CHIPID2_VALUE) { + if (chipid[0] != REG_CHIPID0_VALUE || chipid[1] != REG_CHIPID1_VALUE) { dev_err(ctx->dev, "Unknown Chip ID: 0x%02x 0x%02x 0x%02x\n", chipid[0], chipid[1], chipid[2]); return -EINVAL; -- cgit v1.2.3 From d5d790ba1558dbb8d179054f514476e2ee970b8e Mon Sep 17 00:00:00 2001 From: Bhanu Seshu Kumar Valluri Date: Thu, 9 Oct 2025 11:00:09 +0530 Subject: net: usb: lan78xx: Fix lost EEPROM write timeout error(-ETIMEDOUT) in lan78xx_write_raw_eeprom The function lan78xx_write_raw_eeprom failed to properly propagate EEPROM write timeout errors (-ETIMEDOUT). In the timeout fallthrough path, it first attempted to restore the pin configuration for LED outputs and then returned only the status of that restore operation, discarding the original timeout error saved in ret. As a result, callers could mistakenly treat EEPROM write operation as successful even though the EEPROM write had actually timed out with no or partial data write. To fix this, handle errors in restoring the LED pin configuration separately. If the restore succeeds, return any prior EEPROM write timeout error saved in ret to the caller. Suggested-by: Oleksij Rempel Fixes: 8b1b2ca83b20 ("net: usb: lan78xx: Improve error handling in EEPROM and OTP operations") cc: stable@vger.kernel.org Signed-off-by: Bhanu Seshu Kumar Valluri Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 42d35cc6b421..28195d9a8d6b 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1175,10 +1175,13 @@ static int lan78xx_write_raw_eeprom(struct lan78xx_net *dev, u32 offset, } write_raw_eeprom_done: - if (dev->chipid == ID_REV_CHIP_ID_7800_) - return lan78xx_write_reg(dev, HW_CFG, saved); - - return 0; + if (dev->chipid == ID_REV_CHIP_ID_7800_) { + int rc = lan78xx_write_reg(dev, HW_CFG, saved); + /* If USB fails, there is nothing to do */ + if (rc < 0) + return rc; + } + return ret; } static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, -- cgit v1.2.3 From 3abc0e55ea1fa2250e52bc860e8f24b2b9a2093a Mon Sep 17 00:00:00 2001 From: Rex Lu Date: Thu, 9 Oct 2025 08:29:34 +0200 Subject: net: mtk: wed: add dma mask limitation and GFP_DMA32 for device with more than 4GB DRAM Limit tx/rx buffer address to 32-bit address space for board with more than 4GB DRAM. Fixes: 804775dfc2885 ("net: ethernet: mtk_eth_soc: add support for Wireless Ethernet Dispatch (WED)") Fixes: 6757d345dd7db ("net: ethernet: mtk_wed: introduce hw_rro support for MT7988") Tested-by: Daniel Pawlik Tested-by: Matteo Croce Signed-off-by: Rex Lu Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_wed.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 3dbb113b792c..1ed1f88dd7f8 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -677,7 +677,7 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) void *buf; int s; - page = __dev_alloc_page(GFP_KERNEL); + page = __dev_alloc_page(GFP_KERNEL | GFP_DMA32); if (!page) return -ENOMEM; @@ -800,7 +800,7 @@ mtk_wed_hwrro_buffer_alloc(struct mtk_wed_device *dev) struct page *page; int s; - page = __dev_alloc_page(GFP_KERNEL); + page = __dev_alloc_page(GFP_KERNEL | GFP_DMA32); if (!page) return -ENOMEM; @@ -2426,6 +2426,10 @@ mtk_wed_attach(struct mtk_wed_device *dev) dev->version = hw->version; dev->hw->pcie_base = mtk_wed_get_pcie_base(dev); + ret = dma_set_mask_and_coherent(hw->dev, DMA_BIT_MASK(32)); + if (ret) + goto out; + if (hw->eth->dma_dev == hw->eth->dev && of_dma_is_coherent(hw->eth->dev->of_node)) mtk_eth_set_dma_device(hw->eth, hw->dev); -- cgit v1.2.3 From 65946eac6d888d50ae527c4e5c237dbe5cc3a2f2 Mon Sep 17 00:00:00 2001 From: Yeounsu Moon Date: Fri, 10 Oct 2025 00:57:16 +0900 Subject: net: dlink: handle dma_map_single() failure properly There is no error handling for `dma_map_single()` failures. Add error handling by checking `dma_mapping_error()` and freeing the `skb` using `dev_kfree_skb()` (process context) when it fails. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yeounsu Moon Tested-on: D-Link DGE-550T Rev-A3 Suggested-by: Simon Horman Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/dlink/dl2k.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 1996d2e4e3e2..7077d705e471 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -508,25 +508,34 @@ static int alloc_list(struct net_device *dev) for (i = 0; i < RX_RING_SIZE; i++) { /* Allocated fixed size of skbuff */ struct sk_buff *skb; + dma_addr_t addr; skb = netdev_alloc_skb_ip_align(dev, np->rx_buf_sz); np->rx_skbuff[i] = skb; - if (!skb) { - free_list(dev); - return -ENOMEM; - } + if (!skb) + goto err_free_list; + + addr = dma_map_single(&np->pdev->dev, skb->data, + np->rx_buf_sz, DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pdev->dev, addr)) + goto err_kfree_skb; np->rx_ring[i].next_desc = cpu_to_le64(np->rx_ring_dma + ((i + 1) % RX_RING_SIZE) * sizeof(struct netdev_desc)); /* Rubicon now supports 40 bits of addressing space. */ - np->rx_ring[i].fraginfo = - cpu_to_le64(dma_map_single(&np->pdev->dev, skb->data, - np->rx_buf_sz, DMA_FROM_DEVICE)); + np->rx_ring[i].fraginfo = cpu_to_le64(addr); np->rx_ring[i].fraginfo |= cpu_to_le64((u64)np->rx_buf_sz << 48); } return 0; + +err_kfree_skb: + dev_kfree_skb(np->rx_skbuff[i]); + np->rx_skbuff[i] = NULL; +err_free_list: + free_list(dev); + return -ENOMEM; } static void rio_hw_init(struct net_device *dev) -- cgit v1.2.3 From 68a052239fc4b351e961f698b824f7654a346091 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 Oct 2025 13:56:29 -0700 Subject: selftests: drv-net: update remaining Python init files Convert remaining __init__ files similar to what we did in commit b615879dbfea ("selftests: drv-net: make linters happy with our imports") Signed-off-by: Jakub Kicinski Signed-off-by: Stanislav Fomichev Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../selftests/drivers/net/hw/lib/py/__init__.py | 40 ++++++++++++++++------ .../selftests/drivers/net/lib/py/__init__.py | 4 +-- tools/testing/selftests/net/lib/py/__init__.py | 29 ++++++++++++++-- 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index 0ceb297e7757..fb010a48a5a1 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -1,5 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 +""" +Driver test environment (hardware-only tests). +NetDrvEnv and NetDrvEpEnv are the main environment classes. +Former is for local host only tests, latter creates / connects +to a remote endpoint. See NIPA wiki for more information about +running and writing driver tests. +""" + import sys from pathlib import Path @@ -8,26 +16,36 @@ KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve() try: sys.path.append(KSFT_DIR.as_posix()) - from net.lib.py import * - from drivers.net.lib.py import * - # Import one by one to avoid pylint false positives + from net.lib.py import NetNS, NetNSEnter, NetdevSimDev from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure - from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ - rand_port, tool, wait_port_listen - from net.lib.py import fd_read_timeout + from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ + fd_read_timeout, ip, rand_port, wait_port_listen, wait_file from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none - from net.lib.py import NetNSEnter - from drivers.net.lib.py import GenerateTraffic + from drivers.net.lib.py import GenerateTraffic, Remote from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv + + __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", + "EthtoolFamily", "NetdevFamily", "NetshaperFamily", + "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", + "CmdExitFailure", + "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", + "fd_read_timeout", "ip", "rand_port", + "wait_port_listen", "wait_file", + "KsftSkipEx", "KsftFailEx", "KsftXfailEx", + "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", + "ksft_setup", + "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", + "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", + "ksft_not_none", "ksft_not_none", + "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote"] except ModuleNotFoundError as e: - ksft_pr("Failed importing `net` library from kernel sources") - ksft_pr(str(e)) - ktap_result(True, comment="SKIP") + print("Failed importing `net` library from kernel sources") + print(str(e)) sys.exit(4) diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index e6c070f32f51..b0c6300150fb 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -22,7 +22,7 @@ try: NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ - fd_read_timeout, ip, rand_port, tool, wait_port_listen, wait_file + fd_read_timeout, ip, rand_port, wait_port_listen, wait_file from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup @@ -34,7 +34,7 @@ try: "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", "CmdExitFailure", "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", - "fd_read_timeout", "ip", "rand_port", "tool", + "fd_read_timeout", "ip", "rand_port", "wait_port_listen", "wait_file", "KsftSkipEx", "KsftFailEx", "KsftXfailEx", "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 997b85cc216a..97b7cf2b20eb 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -1,9 +1,32 @@ # SPDX-License-Identifier: GPL-2.0 +""" +Python selftest helpers for netdev. +""" + from .consts import KSRC -from .ksft import * +from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \ + ksft_ne, ksft_true, ksft_not_none, ksft_in, ksft_not_in, ksft_is, \ + ksft_ge, ksft_gt, ksft_lt, ksft_raises, ksft_busy_wait, \ + ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit from .netns import NetNS, NetNSEnter -from .nsim import * -from .utils import * +from .nsim import NetdevSim, NetdevSimDev +from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \ + bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily + +__all__ = ["KSRC", + "KsftFailEx", "KsftSkipEx", "KsftXfailEx", "ksft_pr", "ksft_eq", + "ksft_ne", "ksft_true", "ksft_not_none", "ksft_in", "ksft_not_in", + "ksft_is", "ksft_ge", "ksft_gt", "ksft_lt", "ksft_raises", + "ksft_busy_wait", "ktap_result", "ksft_disruptive", "ksft_setup", + "ksft_run", "ksft_exit", + "NetNS", "NetNSEnter", + "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer", + "bpftool", "ip", "ethtool", "bpftrace", "rand_port", + "wait_port_listen", "wait_file", + "NetdevSim", "NetdevSimDev", + "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError", + "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily", + "RtnlAddrFamily"] -- cgit v1.2.3 From 0b4b77eff5f8cd9be062783a1c1e198d46d0a753 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 10 Oct 2025 16:18:59 +0200 Subject: doc: fix seg6_flowlabel path This sysctl is not per interface; it's global per netns. Fixes: 292ecd9f5a94 ("doc: move seg6_flowlabel to seg6-sysctl.rst") Reported-by: Philippe Guibert Signed-off-by: Nicolas Dichtel Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/seg6-sysctl.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/networking/seg6-sysctl.rst b/Documentation/networking/seg6-sysctl.rst index 07c20e470baf..1b6af4779be1 100644 --- a/Documentation/networking/seg6-sysctl.rst +++ b/Documentation/networking/seg6-sysctl.rst @@ -25,6 +25,9 @@ seg6_require_hmac - INTEGER Default is 0. +/proc/sys/net/ipv6/seg6_* variables: +==================================== + seg6_flowlabel - INTEGER Controls the behaviour of computing the flowlabel of outer IPv6 header in case of SR T.encaps -- cgit v1.2.3 From 39dec6cd888bde4171bd4b8fcf45f73ab684404d Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 1 Oct 2025 21:09:00 +0200 Subject: smb: server: Use common error handling code in smb_direct_rdma_xmit() Add two jump targets so that a bit of exception handling can be better reused at the end of this function implementation. Signed-off-by: Markus Elfring Reviewed-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index b3077766d6ec..a201c5871a77 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -1574,18 +1574,14 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, get_buf_page_count(desc_buf, desc_buf_len), msg->sg_list, SG_CHUNK_SIZE); if (ret) { - kfree(msg); ret = -ENOMEM; - goto out; + goto free_msg; } ret = get_sg_list(desc_buf, desc_buf_len, msg->sgt.sgl, msg->sgt.orig_nents); - if (ret < 0) { - sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); - kfree(msg); - goto out; - } + if (ret < 0) + goto free_table; ret = rdma_rw_ctx_init(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, msg->sgt.sgl, @@ -1596,9 +1592,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); if (ret < 0) { pr_err("failed to init rdma_rw_ctx: %d\n", ret); - sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); - kfree(msg); - goto out; + goto free_table; } list_add_tail(&msg->list, &msg_list); @@ -1630,6 +1624,12 @@ out: atomic_add(credits_needed, &sc->rw_io.credits.count); wake_up(&sc->rw_io.credits.wait_queue); return ret; + +free_table: + sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); +free_msg: + kfree(msg); + goto out; } static int smb_direct_rdma_write(struct ksmbd_transport *t, -- cgit v1.2.3 From ef3e73a917ec7d080e0fb0e4015098a4fb0f1cff Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 10 Oct 2025 12:03:07 +0000 Subject: powerpc/pseries/msi: Fix NULL pointer dereference at irq domain teardown pseries_msi_ops_teardown() reads pci_dev* from msi_alloc_info_t. However, pseries_msi_ops_prepare() does not populate this structure, thus it is all zeros. Consequently, pseries_msi_ops_teardown() triggers a NULL pointer dereference crash. struct pci_dev is available in struct irq_domain. Read it there instead. Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/linuxppc-dev/878d7651-433a-46fe-a28b-1b7e893fcbe0@linux.ibm.com/ Tested-by: Venkat Rao Bagalkote Signed-off-by: Nam Cao Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20251010120307.3281720-1-namcao@linutronix.de --- arch/powerpc/platforms/pseries/msi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 825f9432e03d..a82aaa786e9e 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -443,8 +443,7 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev */ static void pseries_msi_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg) { - struct msi_desc *desc = arg->desc; - struct pci_dev *pdev = msi_desc_to_pci_dev(desc); + struct pci_dev *pdev = to_pci_dev(domain->dev); rtas_disable_msi(pdev); } -- cgit v1.2.3 From 2743cf75f7c92d2a0a4acabd7aef1c17d98fe123 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 8 Oct 2025 08:13:59 +0000 Subject: powerpc, ocxl: Fix extraction of struct xive_irq_data Commit cc0cc23babc9 ("powerpc/xive: Untangle xive from child interrupt controller drivers") changed xive_irq_data to be stashed to chip_data instead of handler_data. However, multiple places are still attempting to read xive_irq_data from handler_data and get a NULL pointer deference bug. Update them to read xive_irq_data from chip_data. Non-XIVE files which touch xive_irq_data seem quite strange to me, especially the ocxl driver. I think there ought to be an alternative platform-independent solution, instead of touching XIVE's data directly. Therefore, I think this whole thing should be cleaned up. But perhaps I just misunderstand something. In any case, this cleanup would not be trivial; for now, just get things working again. Fixes: cc0cc23babc9 ("powerpc/xive: Untangle xive from child interrupt controller drivers") Reported-by: Ritesh Harjani (IBM) Closes: https://lore.kernel.org/linuxppc-dev/68e48df8.170a0220.4b4b0.217d@mx.google.com/ Signed-off-by: Nam Cao Reviewed-by: Ganesh Goudar Acked-by: Andrew Donnellan # ocxl Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20251008081359.1382699-1-namcao@linutronix.de --- arch/powerpc/kvm/book3s_xive.c | 12 ++++-------- arch/powerpc/platforms/powernv/vas.c | 2 +- arch/powerpc/sysdev/xive/common.c | 2 +- drivers/misc/ocxl/afu_irq.c | 2 +- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 1302b5ac5672..89a1b8c21ab4 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -916,8 +916,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, * it fires once. */ if (single_escalation) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(xc->esc_virq[prio]); xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); vcpu->arch.xive_esc_raddr = xd->eoi_page; @@ -1612,7 +1611,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, /* Grab info about irq */ state->pt_number = hw_irq; - state->pt_data = irq_data_get_irq_handler_data(host_data); + state->pt_data = irq_data_get_irq_chip_data(host_data); /* * Configure the IRQ to match the existing configuration of @@ -1787,8 +1786,7 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) */ void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq) { - struct irq_data *d = irq_get_irq_data(irq); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(irq); /* * This slightly odd sequence gives the right result @@ -2827,9 +2825,7 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) i0, i1); } if (xc->esc_virq[i]) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); - struct xive_irq_data *xd = - irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(xc->esc_virq[i]); u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); seq_printf(m, " ESC %d %c%c EOI @%llx", diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index b65256a63e87..9c9650319f3b 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -121,7 +121,7 @@ static int init_vas_instance(struct platform_device *pdev) return -EINVAL; } - xd = irq_get_handler_data(vinst->virq); + xd = irq_get_chip_data(vinst->virq); if (!xd) { pr_err("Inst%d: Invalid virq %d\n", vinst->vas_id, vinst->virq); diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 625361a15424..8d0123b0ae84 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1580,7 +1580,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) cpu, irq); #endif raw_spin_lock(&desc->lock); - xd = irq_desc_get_handler_data(desc); + xd = irq_desc_get_chip_data(desc); /* * Clear saved_p to indicate that it's no longer pending diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c index 36f7379b8e2d..f6b821fc274c 100644 --- a/drivers/misc/ocxl/afu_irq.c +++ b/drivers/misc/ocxl/afu_irq.c @@ -203,7 +203,7 @@ u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id) mutex_lock(&ctx->irq_lock); irq = idr_find(&ctx->irq_idr, irq_id); if (irq) { - xd = irq_get_handler_data(irq->virq); + xd = irq_get_chip_data(irq->virq); addr = xd ? xd->trig_page : 0; } mutex_unlock(&ctx->irq_lock); -- cgit v1.2.3 From 0843ba458439f38efdc14aa359c14ad0127edb01 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 8 Oct 2025 08:59:34 +0530 Subject: powerpc/fadump: skip parameter area allocation when fadump is disabled Fadump allocates memory to pass additional kernel command-line argument to the fadump kernel. However, this allocation is not needed when fadump is disabled. So avoid allocating memory for the additional parameter area in such cases. Fixes: f4892c68ecc1 ("powerpc/fadump: allocate memory for additional parameters early") Reviewed-by: Hari Bathini Signed-off-by: Sourabh Jain Fixes: f4892c68ecc1 ("powerpc/fadump: allocate memory for additional parameters early") Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20251008032934.262683-1-sourabhjain@linux.ibm.com --- arch/powerpc/kernel/fadump.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 5782e743fd27..4ebc333dd786 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1747,6 +1747,9 @@ void __init fadump_setup_param_area(void) { phys_addr_t range_start, range_end; + if (!fw_dump.fadump_enabled) + return; + if (!fw_dump.param_area_supported || fw_dump.dump_active) return; -- cgit v1.2.3 From 12d724f2852d094d68dccaf5101e0ef89a971cde Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 9 Oct 2025 19:46:00 +0900 Subject: ata: libata-core: relax checks in ata_read_log_directory() Commit 6d4405b16d37 ("ata: libata-core: Cache the general purpose log directory") introduced caching of a device general purpose log directory to avoid repeated access to this log page during device scan. This change also added a check on this log page to verify that the log page version is 0x0001 as mandated by the ACS specifications. And it turns out that some devices do not bother reporting this version, instead reporting a version 0, resulting in error messages such as: ata6.00: Invalid log directory version 0x0000 and to the device being marked as not supporting the general purpose log directory log page. Since before commit 6d4405b16d37 the log page version check did not exist and things were still working correctly for these devices, relax ata_read_log_directory() version check and only warn about the invalid log page version number without disabling access to the log directory page. Fixes: 6d4405b16d37 ("ata: libata-core: Cache the general purpose log directory") Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220635 Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 11 ++++------- include/linux/libata.h | 6 ++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ff53f5f029b4..2a210719c4ce 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2174,13 +2174,10 @@ static int ata_read_log_directory(struct ata_device *dev) } version = get_unaligned_le16(&dev->gp_log_dir[0]); - if (version != 0x0001) { - ata_dev_err(dev, "Invalid log directory version 0x%04x\n", - version); - ata_clear_log_directory(dev); - dev->quirks |= ATA_QUIRK_NO_LOG_DIR; - return -EINVAL; - } + if (version != 0x0001) + ata_dev_warn_once(dev, + "Invalid log directory version 0x%04x\n", + version); return 0; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 21de0935775d..7a98de1cc995 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1594,6 +1594,12 @@ do { \ #define ata_dev_dbg(dev, fmt, ...) \ ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__) +#define ata_dev_warn_once(dev, fmt, ...) \ + pr_warn_once("ata%u.%02u: " fmt, \ + (dev)->link->ap->print_id, \ + (dev)->link->pmp + (dev)->devno, \ + ##__VA_ARGS__) + static inline void ata_print_version_once(const struct device *dev, const char *version) { -- cgit v1.2.3 From 5ec6f9434225e18496a393f920b03eb46d67d71d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 13 Oct 2025 09:26:49 +0200 Subject: ALSA: hda/realtek: Add quirk entry for HP ZBook 17 G6 HP ZBook 17 G6 with SSID 103c:860c requires a similar workaround as its 15-inch model in order to make the speaker and mute LED working. Add the corresponding quirk entry to address it. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220372 Cc: Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 214eb9df6ef8..6b9e5c091bf3 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6397,6 +6397,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x854a, "HP EliteBook 830 G6", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x85c6, "HP Pavilion x360 Convertible 14-dy1xxx", ALC295_FIXUP_HP_MUTE_LED_COEFBIT11), SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360), + SND_PCI_QUIRK(0x103c, 0x860c, "HP ZBook 17 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), -- cgit v1.2.3 From 66233e583d1e00b1742d1ba36ae31568109ba6bd Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Mon, 13 Oct 2025 12:35:55 +0800 Subject: ALSA: hda/tas2781: Set tas2781_hda::tasdevice_priv::chip_id as TAS5825 in case of tas5825 The software reset for TAS5825 is different form other chips, as it will set as 0x11 instead of 0x1 during reset in the tasdevice_reset(). So set tas2781_hda::tasdevice_priv::chip_id as TAS5825, tasdevice_reset() can work correctly. Fixes: 7ceb69ca82b1 ("ASoC: tas2781: Add tas2118, tas2x20, tas5825 support") Signed-off-by: Shenghao Ding Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/tas2781_hda_i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index a126f04c3ed7..0357401a6023 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -669,6 +669,7 @@ static int tas2781_hda_i2c_probe(struct i2c_client *clt) */ device_name = "TXNW5825"; hda_priv->hda_chip_id = HDA_TAS5825; + tas_hda->priv->chip_id = TAS5825; } else { return -ENODEV; } -- cgit v1.2.3 From 6079165e6e027c03e06556ff3df0ed03a34d68f0 Mon Sep 17 00:00:00 2001 From: Le Qi Date: Thu, 9 Oct 2025 17:06:18 +0800 Subject: ASoC: dt-bindings: qcom,sm8250: Add QCS615 sound card Add bindings for QCS615 sound card, which looks fully compatible with existing SM8250. Signed-off-by: Le Qi Acked-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20251009090619.1097388-2-le.qi@oss.qualcomm.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/qcom,sm8250.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml b/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml index 8ac91625dce5..b49a920af704 100644 --- a/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml @@ -33,6 +33,7 @@ properties: - qcom,apq8096-sndcard - qcom,glymur-sndcard - qcom,qcm6490-idp-sndcard + - qcom,qcs615-sndcard - qcom,qcs6490-rb3gen2-sndcard - qcom,qcs8275-sndcard - qcom,qcs9075-sndcard -- cgit v1.2.3 From dee4ef0ebe4dee655657ead30892aeca16462823 Mon Sep 17 00:00:00 2001 From: Le Qi Date: Thu, 9 Oct 2025 17:06:19 +0800 Subject: ASoC: qcom: sc8280xp: Add support for QCS615 Add compatible for sound card on QCS615 boards. Signed-off-by: Le Qi Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20251009090619.1097388-3-le.qi@oss.qualcomm.com Signed-off-by: Mark Brown --- sound/soc/qcom/sc8280xp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c index 78e327bc2f07..187f37ffe328 100644 --- a/sound/soc/qcom/sc8280xp.c +++ b/sound/soc/qcom/sc8280xp.c @@ -192,6 +192,7 @@ static int sc8280xp_platform_probe(struct platform_device *pdev) static const struct of_device_id snd_sc8280xp_dt_match[] = { {.compatible = "qcom,qcm6490-idp-sndcard", "qcm6490"}, + {.compatible = "qcom,qcs615-sndcard", "qcs615"}, {.compatible = "qcom,qcs6490-rb3gen2-sndcard", "qcs6490"}, {.compatible = "qcom,qcs8275-sndcard", "qcs8300"}, {.compatible = "qcom,qcs9075-sndcard", "sa8775p"}, -- cgit v1.2.3 From 53a3c6e222836a23e8e0693395584aefc456dca6 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Thu, 2 Oct 2025 15:29:24 +0800 Subject: ASoC: tas2781: Support more newly-released amplifiers tas58xx in the driver TAS5802/TAS5815/TAS5828 has on-chip DSP without current/voltage feedback. Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20251002072925.26242-1-baojun.xu@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781.h | 3 +++ sound/soc/codecs/tas2781-i2c.c | 21 +++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index ddd997ac3216..0fbcdb15c74b 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -120,8 +120,11 @@ enum audio_device { TAS2570, TAS2572, TAS2781, + TAS5802, + TAS5815, TAS5825, TAS5827, + TAS5828, TAS_OTHERS, }; diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 1539b70881d1..ba880b5de7e8 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -108,8 +108,11 @@ static const struct i2c_device_id tasdevice_id[] = { { "tas2570", TAS2570 }, { "tas2572", TAS2572 }, { "tas2781", TAS2781 }, + { "tas5802", TAS5802 }, + { "tas5815", TAS5815 }, { "tas5825", TAS5825 }, { "tas5827", TAS5827 }, + { "tas5828", TAS5828 }, {} }; MODULE_DEVICE_TABLE(i2c, tasdevice_id); @@ -124,8 +127,11 @@ static const struct of_device_id tasdevice_of_match[] = { { .compatible = "ti,tas2570" }, { .compatible = "ti,tas2572" }, { .compatible = "ti,tas2781" }, + { .compatible = "ti,tas5802" }, + { .compatible = "ti,tas5815" }, { .compatible = "ti,tas5825" }, { .compatible = "ti,tas5827" }, + { .compatible = "ti,tas5828" }, {}, }; MODULE_DEVICE_TABLE(of, tasdevice_of_match); @@ -1665,8 +1671,10 @@ static void tasdevice_fw_ready(const struct firmware *fmw, } tas_priv->fw_state = TASDEVICE_DSP_FW_ALL_OK; - /* There is no calibration required for TAS5825/TAS5827. */ - if (tas_priv->chip_id < TAS5825) { + /* There is no calibration required for + * TAS5802/TAS5815/TAS5825/TAS5827/TAS5828. + */ + if (tas_priv->chip_id < TAS5802) { ret = tasdevice_create_cali_ctrls(tas_priv); if (ret) { dev_err(tas_priv->dev, "cali controls error\n"); @@ -1720,8 +1728,11 @@ out: switch (tas_priv->chip_id) { case TAS2563: case TAS2781: + case TAS5802: + case TAS5815: case TAS5825: case TAS5827: + case TAS5828: /* If DSP FW fail, DSP kcontrol won't be created. */ tasdevice_dsp_remove(tas_priv); } @@ -1882,8 +1893,11 @@ static int tasdevice_codec_probe(struct snd_soc_component *codec) p = (struct snd_kcontrol_new *)tas2781_snd_controls; size = ARRAY_SIZE(tas2781_snd_controls); break; + case TAS5802: + case TAS5815: case TAS5825: case TAS5827: + case TAS5828: p = (struct snd_kcontrol_new *)tas5825_snd_controls; size = ARRAY_SIZE(tas5825_snd_controls); break; @@ -2054,8 +2068,11 @@ static const struct acpi_device_id tasdevice_acpi_match[] = { { "TXNW2570", TAS2570 }, { "TXNW2572", TAS2572 }, { "TXNW2781", TAS2781 }, + { "TXNW5802", TAS5802 }, + { "TXNW5815", TAS5815 }, { "TXNW5825", TAS5825 }, { "TXNW5827", TAS5827 }, + { "TXNW5828", TAS5828 }, {}, }; -- cgit v1.2.3 From 7e6cfa3e94cf8278ee3170dc0b81fc6db1287e28 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Thu, 2 Oct 2025 15:29:25 +0800 Subject: ASoC: tas2781: Update ti,tas2781.yaml for adding tas58xx Update ti,tas2781.yaml for adding TAS5802/TAS5815/TAS5828. Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20251002072925.26242-2-baojun.xu@ti.com Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/ti,tas2781.yaml | 43 +++++++++++++++++++--- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/ti,tas2781.yaml b/Documentation/devicetree/bindings/sound/ti,tas2781.yaml index bd00afa47d62..7f84f506013c 100644 --- a/Documentation/devicetree/bindings/sound/ti,tas2781.yaml +++ b/Documentation/devicetree/bindings/sound/ti,tas2781.yaml @@ -24,10 +24,10 @@ description: | Instruments Smart Amp speaker protection algorithm. The integrated speaker voltage and current sense provides for real time monitoring of loudspeaker behavior. - The TAS5825/TAS5827 is a stereo, digital input Class-D audio - amplifier optimized for efficiently driving high peak power into - small loudspeakers. An integrated on-chip DSP supports Texas - Instruments Smart Amp speaker protection algorithm. + The TAS5802/TAS5815/TAS5825/TAS5827/TAS5828 is a stereo, digital input + Class-D audio amplifier optimized for efficiently driving high peak + power into small loudspeakers. An integrated on-chip DSP supports + Texas Instruments Smart Amp speaker protection algorithm. Specifications about the audio amplifier can be found at: https://www.ti.com/lit/gpn/tas2120 @@ -35,8 +35,10 @@ description: | https://www.ti.com/lit/gpn/tas2563 https://www.ti.com/lit/gpn/tas2572 https://www.ti.com/lit/gpn/tas2781 + https://www.ti.com/lit/gpn/tas5815 https://www.ti.com/lit/gpn/tas5825m https://www.ti.com/lit/gpn/tas5827 + https://www.ti.com/lit/gpn/tas5828m properties: compatible: @@ -65,11 +67,21 @@ properties: Protection and Audio Processing, 16/20/24/32bit stereo I2S or multichannel TDM. + ti,tas5802: 22-W, Inductor-Less, Digital Input, Closed-Loop Class-D + Audio Amplifier with 96-Khz Extended Processing and Low Idle Power + Dissipation. + + ti,tas5815: 30-W, Digital Input, Stereo, Closed-loop Class-D Audio + Amplifier with 96 kHz Enhanced Processing + ti,tas5825: 38-W Stereo, Inductor-Less, Digital Input, Closed-Loop 4.5V to 26.4V Class-D Audio Amplifier with 192-kHz Extended Audio Processing. - ti,tas5827: 47-W Stereo, Digital Input, High Efficiency Closed-Loop Class-D - Amplifier with Class-H Algorithm + ti,tas5827: 47-W Stereo, Digital Input, High Efficiency Closed-Loop + Class-D Amplifier with Class-H Algorithm + + ti,tas5828: 50-W Stereo, Digital Input, High Efficiency Closed-Loop + Class-D Amplifier with Hybrid-Pro Algorithm oneOf: - items: - enum: @@ -80,8 +92,11 @@ properties: - ti,tas2563 - ti,tas2570 - ti,tas2572 + - ti,tas5802 + - ti,tas5815 - ti,tas5825 - ti,tas5827 + - ti,tas5828 - const: ti,tas2781 - enum: - ti,tas2781 @@ -177,12 +192,28 @@ allOf: minimum: 0x38 maximum: 0x3f + - if: + properties: + compatible: + contains: + enum: + - ti,tas5802 + - ti,tas5815 + then: + properties: + reg: + maxItems: 4 + items: + minimum: 0x54 + maximum: 0x57 + - if: properties: compatible: contains: enum: - ti,tas5827 + - ti,tas5828 then: properties: reg: -- cgit v1.2.3 From 6370a996f308ea3276030769b7482b346e7cc7c1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 9 Oct 2025 19:03:13 +0200 Subject: ASoC: codecs: Fix gain setting ranges for Renesas IDT821034 codec The gain ranges specified in Renesas IDT821034 codec documentation are [-3dB;+13dB] in the transmit path (ADC) and [-13dB;+3dB] in the receive path (DAC). Allthough the registers allow programming values outside those ranges, the signal S/N and distorsion are only guaranteed in the specified ranges. Set ranges to the specified ones. Fixes: e51166990e81 ("ASoC: codecs: Add support for the Renesas IDT821034 codec") Signed-off-by: Christophe Leroy Link: https://patch.msgid.link/2bd547194f3398e6182f770d7d6be711c702b4b2.1760029099.git.christophe.leroy@csgroup.eu Signed-off-by: Mark Brown --- sound/soc/codecs/idt821034.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/idt821034.c b/sound/soc/codecs/idt821034.c index a03d4e5e7d14..cab2f2eecdfb 100644 --- a/sound/soc/codecs/idt821034.c +++ b/sound/soc/codecs/idt821034.c @@ -548,14 +548,14 @@ end: return ret; } -static const DECLARE_TLV_DB_LINEAR(idt821034_gain_in, -6520, 1306); -#define IDT821034_GAIN_IN_MIN_RAW 1 /* -65.20 dB -> 10^(-65.2/20.0) * 1820 = 1 */ -#define IDT821034_GAIN_IN_MAX_RAW 8191 /* 13.06 dB -> 10^(13.06/20.0) * 1820 = 8191 */ +static const DECLARE_TLV_DB_LINEAR(idt821034_gain_in, -300, 1300); +#define IDT821034_GAIN_IN_MIN_RAW 1288 /* -3.0 dB -> 10^(-3.0/20.0) * 1820 = 1288 */ +#define IDT821034_GAIN_IN_MAX_RAW 8130 /* 13.0 dB -> 10^(13.0/20.0) * 1820 = 8130 */ #define IDT821034_GAIN_IN_INIT_RAW 1820 /* 0dB -> 10^(0/20) * 1820 = 1820 */ -static const DECLARE_TLV_DB_LINEAR(idt821034_gain_out, -6798, 1029); -#define IDT821034_GAIN_OUT_MIN_RAW 1 /* -67.98 dB -> 10^(-67.98/20.0) * 2506 = 1*/ -#define IDT821034_GAIN_OUT_MAX_RAW 8191 /* 10.29 dB -> 10^(10.29/20.0) * 2506 = 8191 */ +static const DECLARE_TLV_DB_LINEAR(idt821034_gain_out, -1300, 300); +#define IDT821034_GAIN_OUT_MIN_RAW 561 /* -13.0 dB -> 10^(-13.0/20.0) * 2506 = 561 */ +#define IDT821034_GAIN_OUT_MAX_RAW 3540 /* 3.0 dB -> 10^(3.0/20.0) * 2506 = 3540 */ #define IDT821034_GAIN_OUT_INIT_RAW 2506 /* 0dB -> 10^(0/20) * 2506 = 2506 */ static const struct snd_kcontrol_new idt821034_controls[] = { -- cgit v1.2.3 From 7e8242405b94ceac6db820de7d4fd9318cbc1219 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Wed, 1 Oct 2025 08:08:03 +0200 Subject: rpmb: move rpmb_frame struct and constants to common header Move struct rpmb_frame and RPMB operation constants from MMC block driver to include/linux/rpmb.h for reuse across different RPMB implementations (UFS, NVMe, etc.). Signed-off-by: Bean Huo Reviewed-by: Avri Altman Acked-by: Jens Wiklander Reviewed-by: Bart Van Assche Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 42 ------------------------------------------ include/linux/rpmb.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 42 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 9399bf6c766a..c0ffe0817fd4 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -79,48 +79,6 @@ MODULE_ALIAS("mmc:block"); #define MMC_EXTRACT_INDEX_FROM_ARG(x) ((x & 0x00FF0000) >> 16) #define MMC_EXTRACT_VALUE_FROM_ARG(x) ((x & 0x0000FF00) >> 8) -/** - * struct rpmb_frame - rpmb frame as defined by eMMC 5.1 (JESD84-B51) - * - * @stuff : stuff bytes - * @key_mac : The authentication key or the message authentication - * code (MAC) depending on the request/response type. - * The MAC will be delivered in the last (or the only) - * block of data. - * @data : Data to be written or read by signed access. - * @nonce : Random number generated by the host for the requests - * and copied to the response by the RPMB engine. - * @write_counter: Counter value for the total amount of the successful - * authenticated data write requests made by the host. - * @addr : Address of the data to be programmed to or read - * from the RPMB. Address is the serial number of - * the accessed block (half sector 256B). - * @block_count : Number of blocks (half sectors, 256B) requested to be - * read/programmed. - * @result : Includes information about the status of the write counter - * (valid, expired) and result of the access made to the RPMB. - * @req_resp : Defines the type of request and response to/from the memory. - * - * The stuff bytes and big-endian properties are modeled to fit to the spec. - */ -struct rpmb_frame { - u8 stuff[196]; - u8 key_mac[32]; - u8 data[256]; - u8 nonce[16]; - __be32 write_counter; - __be16 addr; - __be16 block_count; - __be16 result; - __be16 req_resp; -} __packed; - -#define RPMB_PROGRAM_KEY 0x1 /* Program RPMB Authentication Key */ -#define RPMB_GET_WRITE_COUNTER 0x2 /* Read RPMB write counter */ -#define RPMB_WRITE_DATA 0x3 /* Write data to RPMB partition */ -#define RPMB_READ_DATA 0x4 /* Read data from RPMB partition */ -#define RPMB_RESULT_READ 0x5 /* Read result request (Internal) */ - #define RPMB_FRAME_SIZE sizeof(struct rpmb_frame) #define CHECK_SIZE_NEQ(val) ((val) != sizeof(struct rpmb_frame)) #define CHECK_SIZE_ALIGNED(val) IS_ALIGNED((val), sizeof(struct rpmb_frame)) diff --git a/include/linux/rpmb.h b/include/linux/rpmb.h index cccda73eea4d..ed3f8e431eff 100644 --- a/include/linux/rpmb.h +++ b/include/linux/rpmb.h @@ -61,6 +61,50 @@ struct rpmb_dev { #define to_rpmb_dev(x) container_of((x), struct rpmb_dev, dev) +/** + * struct rpmb_frame - RPMB frame structure for authenticated access + * + * @stuff : stuff bytes, a padding/reserved area of 196 bytes at the + * beginning of the RPMB frame. They don’t carry meaningful + * data but are required to make the frame exactly 512 bytes. + * @key_mac : The authentication key or the message authentication + * code (MAC) depending on the request/response type. + * The MAC will be delivered in the last (or the only) + * block of data. + * @data : Data to be written or read by signed access. + * @nonce : Random number generated by the host for the requests + * and copied to the response by the RPMB engine. + * @write_counter: Counter value for the total amount of the successful + * authenticated data write requests made by the host. + * @addr : Address of the data to be programmed to or read + * from the RPMB. Address is the serial number of + * the accessed block (half sector 256B). + * @block_count : Number of blocks (half sectors, 256B) requested to be + * read/programmed. + * @result : Includes information about the status of the write counter + * (valid, expired) and result of the access made to the RPMB. + * @req_resp : Defines the type of request and response to/from the memory. + * + * The stuff bytes and big-endian properties are modeled to fit to the spec. + */ +struct rpmb_frame { + u8 stuff[196]; + u8 key_mac[32]; + u8 data[256]; + u8 nonce[16]; + __be32 write_counter; + __be16 addr; + __be16 block_count; + __be16 result; + __be16 req_resp; +}; + +#define RPMB_PROGRAM_KEY 0x1 /* Program RPMB Authentication Key */ +#define RPMB_GET_WRITE_COUNTER 0x2 /* Read RPMB write counter */ +#define RPMB_WRITE_DATA 0x3 /* Write data to RPMB partition */ +#define RPMB_READ_DATA 0x4 /* Read data from RPMB partition */ +#define RPMB_RESULT_READ 0x5 /* Read result request (Internal) */ + #if IS_ENABLED(CONFIG_RPMB) struct rpmb_dev *rpmb_dev_get(struct rpmb_dev *rdev); void rpmb_dev_put(struct rpmb_dev *rdev); -- cgit v1.2.3 From 6e54919cb541fdf1063b16f3254c28d01bc9e5ff Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Oct 2025 21:03:23 +0300 Subject: ASoC: nau8821: Cancel jdet_work before handling jack ejection The microphone detection work scheduled by a prior jack insertion interrupt may still be in a pending state or under execution when a jack ejection interrupt has been fired. This might lead to a racing condition or nau8821_jdet_work() completing after nau8821_eject_jack(), which will override the currently disconnected state of the jack and incorrectly report the headphone or the headset as being connected. Cancel any pending jdet_work or wait for its execution to finish before attempting to handle the ejection interrupt. Proceed similarly before launching the eject handler as a consequence of detecting an invalid insert interrupt. Fixes: aab1ad11d69f ("ASoC: nau8821: new driver") Signed-off-by: Cristian Ciocaltea Link: https://patch.msgid.link/20251003-nau8821-jdet-fixes-v1-1-f7b0e2543f09@collabora.com Signed-off-by: Mark Brown --- sound/soc/codecs/nau8821.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/nau8821.c b/sound/soc/codecs/nau8821.c index edb95f869a4a..fed5d51fa5c3 100644 --- a/sound/soc/codecs/nau8821.c +++ b/sound/soc/codecs/nau8821.c @@ -1185,6 +1185,7 @@ static irqreturn_t nau8821_interrupt(int irq, void *data) if ((active_irq & NAU8821_JACK_EJECT_IRQ_MASK) == NAU8821_JACK_EJECT_DETECTED) { + cancel_work_sync(&nau8821->jdet_work); regmap_update_bits(regmap, NAU8821_R71_ANALOG_ADC_1, NAU8821_MICDET_MASK, NAU8821_MICDET_DIS); nau8821_eject_jack(nau8821); @@ -1199,11 +1200,11 @@ static irqreturn_t nau8821_interrupt(int irq, void *data) clear_irq = NAU8821_KEY_RELEASE_IRQ; } else if ((active_irq & NAU8821_JACK_INSERT_IRQ_MASK) == NAU8821_JACK_INSERT_DETECTED) { + cancel_work_sync(&nau8821->jdet_work); regmap_update_bits(regmap, NAU8821_R71_ANALOG_ADC_1, NAU8821_MICDET_MASK, NAU8821_MICDET_EN); if (nau8821_is_jack_inserted(regmap)) { /* detect microphone and jack type */ - cancel_work_sync(&nau8821->jdet_work); schedule_work(&nau8821->jdet_work); /* Turn off insertion interruption at manual mode */ regmap_update_bits(regmap, -- cgit v1.2.3 From 9273aa85b35cc02d0953a1ba3b7bd694e5a2c10e Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Oct 2025 21:03:24 +0300 Subject: ASoC: nau8821: Generalize helper to clear IRQ status Instead of adding yet another utility function for dealing with the interrupt clearing register, generalize nau8821_int_status_clear_all() by renaming it to nau8821_irq_status_clear(), whilst introducing a second parameter to allow restricting the operation scope to a single interrupt instead of the whole range of active IRQs. While at it, also fix a spelling typo in the comment block. Note this is mainly a prerequisite for subsequent patches aiming to address some deficiencies in the implementation of the interrupt handler. Thus the presence of the Fixes tag below is intentional, to facilitate backporting. Fixes: aab1ad11d69f ("ASoC: nau8821: new driver") Signed-off-by: Cristian Ciocaltea Link: https://patch.msgid.link/20251003-nau8821-jdet-fixes-v1-2-f7b0e2543f09@collabora.com Signed-off-by: Mark Brown --- sound/soc/codecs/nau8821.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/nau8821.c b/sound/soc/codecs/nau8821.c index fed5d51fa5c3..cefce9789312 100644 --- a/sound/soc/codecs/nau8821.c +++ b/sound/soc/codecs/nau8821.c @@ -1021,12 +1021,17 @@ static bool nau8821_is_jack_inserted(struct regmap *regmap) return active_high == is_high; } -static void nau8821_int_status_clear_all(struct regmap *regmap) +static void nau8821_irq_status_clear(struct regmap *regmap, int active_irq) { - int active_irq, clear_irq, i; + int clear_irq, i; - /* Reset the intrruption status from rightmost bit if the corres- - * ponding irq event occurs. + if (active_irq) { + regmap_write(regmap, NAU8821_R11_INT_CLR_KEY_STATUS, active_irq); + return; + } + + /* Reset the interruption status from rightmost bit if the + * corresponding irq event occurs. */ regmap_read(regmap, NAU8821_R10_IRQ_STATUS, &active_irq); for (i = 0; i < NAU8821_REG_DATA_LEN; i++) { @@ -1053,7 +1058,7 @@ static void nau8821_eject_jack(struct nau8821 *nau8821) snd_soc_dapm_sync(dapm); /* Clear all interruption status */ - nau8821_int_status_clear_all(regmap); + nau8821_irq_status_clear(regmap, 0); /* Enable the insertion interruption, disable the ejection inter- * ruption, and then bypass de-bounce circuit. @@ -1522,7 +1527,7 @@ static int nau8821_resume_setup(struct nau8821 *nau8821) nau8821_configure_sysclk(nau8821, NAU8821_CLK_DIS, 0); if (nau8821->irq) { /* Clear all interruption status */ - nau8821_int_status_clear_all(regmap); + nau8821_irq_status_clear(regmap, 0); /* Enable both insertion and ejection interruptions, and then * bypass de-bounce circuit. -- cgit v1.2.3 From a698679fe8b0fec41d1fb9547a53127a85c1be92 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Oct 2025 21:03:25 +0300 Subject: ASoC: nau8821: Consistently clear interrupts before unmasking The interrupt handler attempts to perform some IRQ status clear operations *after* rather than *before* unmasking and enabling interrupts. This is a rather fragile approach since it may generally lead to missing IRQ requests or causing spurious interrupts. Make use of the nau8821_irq_status_clear() helper instead of manipulating the related register directly and ensure any interrupt clearing is performed *after* the target interrupts are disabled/masked and *before* proceeding with additional interrupt unmasking/enablement operations. This also implicitly drops the redundant clear operation of the ejection IRQ in the interrupt handler, since nau8821_eject_jack() has been already responsible for clearing all active interrupts. Fixes: aab1ad11d69f ("ASoC: nau8821: new driver") Fixes: 2551b6e89936 ("ASoC: nau8821: Add headset button detection") Signed-off-by: Cristian Ciocaltea Link: https://patch.msgid.link/20251003-nau8821-jdet-fixes-v1-3-f7b0e2543f09@collabora.com Signed-off-by: Mark Brown --- sound/soc/codecs/nau8821.c | 58 ++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/sound/soc/codecs/nau8821.c b/sound/soc/codecs/nau8821.c index cefce9789312..56e5a0d80782 100644 --- a/sound/soc/codecs/nau8821.c +++ b/sound/soc/codecs/nau8821.c @@ -1057,20 +1057,24 @@ static void nau8821_eject_jack(struct nau8821 *nau8821) snd_soc_component_disable_pin(component, "MICBIAS"); snd_soc_dapm_sync(dapm); + /* Disable & mask both insertion & ejection IRQs */ + regmap_update_bits(regmap, NAU8821_R12_INTERRUPT_DIS_CTRL, + NAU8821_IRQ_INSERT_DIS | NAU8821_IRQ_EJECT_DIS, + NAU8821_IRQ_INSERT_DIS | NAU8821_IRQ_EJECT_DIS); + regmap_update_bits(regmap, NAU8821_R0F_INTERRUPT_MASK, + NAU8821_IRQ_INSERT_EN | NAU8821_IRQ_EJECT_EN, + NAU8821_IRQ_INSERT_EN | NAU8821_IRQ_EJECT_EN); + /* Clear all interruption status */ nau8821_irq_status_clear(regmap, 0); - /* Enable the insertion interruption, disable the ejection inter- - * ruption, and then bypass de-bounce circuit. - */ + /* Enable & unmask the insertion IRQ */ regmap_update_bits(regmap, NAU8821_R12_INTERRUPT_DIS_CTRL, - NAU8821_IRQ_EJECT_DIS | NAU8821_IRQ_INSERT_DIS, - NAU8821_IRQ_EJECT_DIS); - /* Mask unneeded IRQs: 1 - disable, 0 - enable */ + NAU8821_IRQ_INSERT_DIS, 0); regmap_update_bits(regmap, NAU8821_R0F_INTERRUPT_MASK, - NAU8821_IRQ_EJECT_EN | NAU8821_IRQ_INSERT_EN, - NAU8821_IRQ_EJECT_EN); + NAU8821_IRQ_INSERT_EN, 0); + /* Bypass de-bounce circuit */ regmap_update_bits(regmap, NAU8821_R0D_JACK_DET_CTRL, NAU8821_JACK_DET_DB_BYPASS, NAU8821_JACK_DET_DB_BYPASS); @@ -1094,7 +1098,6 @@ static void nau8821_eject_jack(struct nau8821 *nau8821) NAU8821_IRQ_KEY_RELEASE_DIS | NAU8821_IRQ_KEY_PRESS_DIS); } - } static void nau8821_jdet_work(struct work_struct *work) @@ -1151,6 +1154,15 @@ static void nau8821_setup_inserted_irq(struct nau8821 *nau8821) { struct regmap *regmap = nau8821->regmap; + /* Disable & mask insertion IRQ */ + regmap_update_bits(regmap, NAU8821_R12_INTERRUPT_DIS_CTRL, + NAU8821_IRQ_INSERT_DIS, NAU8821_IRQ_INSERT_DIS); + regmap_update_bits(regmap, NAU8821_R0F_INTERRUPT_MASK, + NAU8821_IRQ_INSERT_EN, NAU8821_IRQ_INSERT_EN); + + /* Clear insert IRQ status */ + nau8821_irq_status_clear(regmap, NAU8821_JACK_INSERT_DETECTED); + /* Enable internal VCO needed for interruptions */ if (nau8821->dapm->bias_level < SND_SOC_BIAS_PREPARE) nau8821_configure_sysclk(nau8821, NAU8821_CLK_INTERNAL, 0); @@ -1169,17 +1181,18 @@ static void nau8821_setup_inserted_irq(struct nau8821 *nau8821) regmap_update_bits(regmap, NAU8821_R0D_JACK_DET_CTRL, NAU8821_JACK_DET_DB_BYPASS, 0); + /* Unmask & enable the ejection IRQs */ regmap_update_bits(regmap, NAU8821_R0F_INTERRUPT_MASK, - NAU8821_IRQ_EJECT_EN, 0); + NAU8821_IRQ_EJECT_EN, 0); regmap_update_bits(regmap, NAU8821_R12_INTERRUPT_DIS_CTRL, - NAU8821_IRQ_EJECT_DIS, 0); + NAU8821_IRQ_EJECT_DIS, 0); } static irqreturn_t nau8821_interrupt(int irq, void *data) { struct nau8821 *nau8821 = (struct nau8821 *)data; struct regmap *regmap = nau8821->regmap; - int active_irq, clear_irq = 0, event = 0, event_mask = 0; + int active_irq, event = 0, event_mask = 0; if (regmap_read(regmap, NAU8821_R10_IRQ_STATUS, &active_irq)) { dev_err(nau8821->dev, "failed to read irq status\n"); @@ -1195,14 +1208,13 @@ static irqreturn_t nau8821_interrupt(int irq, void *data) NAU8821_MICDET_MASK, NAU8821_MICDET_DIS); nau8821_eject_jack(nau8821); event_mask |= SND_JACK_HEADSET; - clear_irq = NAU8821_JACK_EJECT_IRQ_MASK; } else if (active_irq & NAU8821_KEY_SHORT_PRESS_IRQ) { event |= NAU8821_BUTTON; event_mask |= NAU8821_BUTTON; - clear_irq = NAU8821_KEY_SHORT_PRESS_IRQ; + nau8821_irq_status_clear(regmap, NAU8821_KEY_SHORT_PRESS_IRQ); } else if (active_irq & NAU8821_KEY_RELEASE_IRQ) { event_mask = NAU8821_BUTTON; - clear_irq = NAU8821_KEY_RELEASE_IRQ; + nau8821_irq_status_clear(regmap, NAU8821_KEY_RELEASE_IRQ); } else if ((active_irq & NAU8821_JACK_INSERT_IRQ_MASK) == NAU8821_JACK_INSERT_DETECTED) { cancel_work_sync(&nau8821->jdet_work); @@ -1212,27 +1224,17 @@ static irqreturn_t nau8821_interrupt(int irq, void *data) /* detect microphone and jack type */ schedule_work(&nau8821->jdet_work); /* Turn off insertion interruption at manual mode */ - regmap_update_bits(regmap, - NAU8821_R12_INTERRUPT_DIS_CTRL, - NAU8821_IRQ_INSERT_DIS, - NAU8821_IRQ_INSERT_DIS); - regmap_update_bits(regmap, - NAU8821_R0F_INTERRUPT_MASK, - NAU8821_IRQ_INSERT_EN, - NAU8821_IRQ_INSERT_EN); nau8821_setup_inserted_irq(nau8821); } else { dev_warn(nau8821->dev, "Inserted IRQ fired but not connected\n"); nau8821_eject_jack(nau8821); } + } else { + /* Clear the rightmost interrupt */ + nau8821_irq_status_clear(regmap, active_irq); } - if (!clear_irq) - clear_irq = active_irq; - /* clears the rightmost interruption */ - regmap_write(regmap, NAU8821_R11_INT_CLR_KEY_STATUS, clear_irq); - if (event_mask) snd_soc_jack_report(nau8821->jack, event, event_mask); -- cgit v1.2.3 From 2b4eda7bf7d8a4e2f7575a98f55d8336dec0f302 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Oct 2025 21:03:26 +0300 Subject: ASoC: nau8821: Add DMI quirk to bypass jack debounce circuit Stress testing the audio jack hotplug handling on a few Steam Deck units revealed that the debounce circuit is responsible for having a negative impact on the detection reliability, e.g. in some cases the ejection interrupt is not fired, while in other instances it goes into a kind of invalid state and generates a flood of misleading interrupts. Add new entries to the DMI table introduced via commit 1bc40efdaf4a ("ASoC: nau8821: Add DMI quirk mechanism for active-high jack-detect") and extend the quirk logic to allow bypassing the debounce circuit used for jack detection on Valve Steam Deck LCD and OLED models. While at it, rename existing NAU8821_JD_ACTIVE_HIGH quirk bitfield to NAU8821_QUIRK_JD_ACTIVE_HIGH. This should help improve code readability by differentiating from similarly named register bits. Fixes: aab1ad11d69f ("ASoC: nau8821: new driver") Signed-off-by: Cristian Ciocaltea Link: https://patch.msgid.link/20251003-nau8821-jdet-fixes-v1-4-f7b0e2543f09@collabora.com Signed-off-by: Mark Brown --- sound/soc/codecs/nau8821.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/nau8821.c b/sound/soc/codecs/nau8821.c index 56e5a0d80782..a8ff2ce70be9 100644 --- a/sound/soc/codecs/nau8821.c +++ b/sound/soc/codecs/nau8821.c @@ -26,7 +26,8 @@ #include #include "nau8821.h" -#define NAU8821_JD_ACTIVE_HIGH BIT(0) +#define NAU8821_QUIRK_JD_ACTIVE_HIGH BIT(0) +#define NAU8821_QUIRK_JD_DB_BYPASS BIT(1) static int nau8821_quirk; static int quirk_override = -1; @@ -1177,9 +1178,10 @@ static void nau8821_setup_inserted_irq(struct nau8821 *nau8821) regmap_update_bits(regmap, NAU8821_R1D_I2S_PCM_CTRL2, NAU8821_I2S_MS_MASK, NAU8821_I2S_MS_SLAVE); - /* Not bypass de-bounce circuit */ - regmap_update_bits(regmap, NAU8821_R0D_JACK_DET_CTRL, - NAU8821_JACK_DET_DB_BYPASS, 0); + /* Do not bypass de-bounce circuit */ + if (!(nau8821_quirk & NAU8821_QUIRK_JD_DB_BYPASS)) + regmap_update_bits(regmap, NAU8821_R0D_JACK_DET_CTRL, + NAU8821_JACK_DET_DB_BYPASS, 0); /* Unmask & enable the ejection IRQs */ regmap_update_bits(regmap, NAU8821_R0F_INTERRUPT_MASK, @@ -1864,7 +1866,23 @@ static const struct dmi_system_id nau8821_quirk_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"), DMI_MATCH(DMI_BOARD_NAME, "CW14Q01P-V2"), }, - .driver_data = (void *)(NAU8821_JD_ACTIVE_HIGH), + .driver_data = (void *)(NAU8821_QUIRK_JD_ACTIVE_HIGH), + }, + { + /* Valve Steam Deck LCD */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Valve"), + DMI_MATCH(DMI_PRODUCT_NAME, "Jupiter"), + }, + .driver_data = (void *)(NAU8821_QUIRK_JD_DB_BYPASS), + }, + { + /* Valve Steam Deck OLED */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Valve"), + DMI_MATCH(DMI_PRODUCT_NAME, "Galileo"), + }, + .driver_data = (void *)(NAU8821_QUIRK_JD_DB_BYPASS), }, {} }; @@ -1906,9 +1924,12 @@ static int nau8821_i2c_probe(struct i2c_client *i2c) nau8821_check_quirks(); - if (nau8821_quirk & NAU8821_JD_ACTIVE_HIGH) + if (nau8821_quirk & NAU8821_QUIRK_JD_ACTIVE_HIGH) nau8821->jkdet_polarity = 0; + if (nau8821_quirk & NAU8821_QUIRK_JD_DB_BYPASS) + dev_dbg(dev, "Force bypassing jack detection debounce circuit\n"); + nau8821_print_device_properties(nau8821); nau8821_reset_chip(nau8821->regmap); -- cgit v1.2.3 From ee70bacef1c6050e4836409927294d744dbcfa72 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Oct 2025 21:03:27 +0300 Subject: ASoC: nau8821: Avoid unnecessary blocking in IRQ handler The interrupt handler offloads the microphone detection logic to nau8821_jdet_work(), which implies a sleep operation. However, before being able to process any subsequent hotplug event, the interrupt handler needs to wait for any prior scheduled work to complete. Move the sleep out of jdet_work by converting it to a delayed work. This eliminates the undesired blocking in the interrupt handler when attempting to cancel a recently scheduled work item and should help reducing transient input reports that might confuse user-space. Signed-off-by: Cristian Ciocaltea Link: https://patch.msgid.link/20251003-nau8821-jdet-fixes-v1-5-f7b0e2543f09@collabora.com Signed-off-by: Mark Brown --- sound/soc/codecs/nau8821.c | 22 ++++++++++++---------- sound/soc/codecs/nau8821.h | 2 +- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/sound/soc/codecs/nau8821.c b/sound/soc/codecs/nau8821.c index a8ff2ce70be9..4fa9a785513e 100644 --- a/sound/soc/codecs/nau8821.c +++ b/sound/soc/codecs/nau8821.c @@ -1104,16 +1104,12 @@ static void nau8821_eject_jack(struct nau8821 *nau8821) static void nau8821_jdet_work(struct work_struct *work) { struct nau8821 *nau8821 = - container_of(work, struct nau8821, jdet_work); + container_of(work, struct nau8821, jdet_work.work); struct snd_soc_dapm_context *dapm = nau8821->dapm; struct snd_soc_component *component = snd_soc_dapm_to_component(dapm); struct regmap *regmap = nau8821->regmap; int jack_status_reg, mic_detected, event = 0, event_mask = 0; - snd_soc_component_force_enable_pin(component, "MICBIAS"); - snd_soc_dapm_sync(dapm); - msleep(20); - regmap_read(regmap, NAU8821_R58_I2C_DEVICE_ID, &jack_status_reg); mic_detected = !(jack_status_reg & NAU8821_KEYDET); if (mic_detected) { @@ -1146,6 +1142,7 @@ static void nau8821_jdet_work(struct work_struct *work) snd_soc_component_disable_pin(component, "MICBIAS"); snd_soc_dapm_sync(dapm); } + event_mask |= SND_JACK_HEADSET; snd_soc_jack_report(nau8821->jack, event, event_mask); } @@ -1194,6 +1191,7 @@ static irqreturn_t nau8821_interrupt(int irq, void *data) { struct nau8821 *nau8821 = (struct nau8821 *)data; struct regmap *regmap = nau8821->regmap; + struct snd_soc_component *component; int active_irq, event = 0, event_mask = 0; if (regmap_read(regmap, NAU8821_R10_IRQ_STATUS, &active_irq)) { @@ -1205,7 +1203,7 @@ static irqreturn_t nau8821_interrupt(int irq, void *data) if ((active_irq & NAU8821_JACK_EJECT_IRQ_MASK) == NAU8821_JACK_EJECT_DETECTED) { - cancel_work_sync(&nau8821->jdet_work); + cancel_delayed_work_sync(&nau8821->jdet_work); regmap_update_bits(regmap, NAU8821_R71_ANALOG_ADC_1, NAU8821_MICDET_MASK, NAU8821_MICDET_DIS); nau8821_eject_jack(nau8821); @@ -1219,12 +1217,15 @@ static irqreturn_t nau8821_interrupt(int irq, void *data) nau8821_irq_status_clear(regmap, NAU8821_KEY_RELEASE_IRQ); } else if ((active_irq & NAU8821_JACK_INSERT_IRQ_MASK) == NAU8821_JACK_INSERT_DETECTED) { - cancel_work_sync(&nau8821->jdet_work); + cancel_delayed_work_sync(&nau8821->jdet_work); regmap_update_bits(regmap, NAU8821_R71_ANALOG_ADC_1, NAU8821_MICDET_MASK, NAU8821_MICDET_EN); if (nau8821_is_jack_inserted(regmap)) { - /* detect microphone and jack type */ - schedule_work(&nau8821->jdet_work); + /* Detect microphone and jack type */ + component = snd_soc_dapm_to_component(nau8821->dapm); + snd_soc_component_force_enable_pin(component, "MICBIAS"); + snd_soc_dapm_sync(nau8821->dapm); + schedule_delayed_work(&nau8821->jdet_work, msecs_to_jiffies(20)); /* Turn off insertion interruption at manual mode */ nau8821_setup_inserted_irq(nau8821); } else { @@ -1661,7 +1662,8 @@ int nau8821_enable_jack_detect(struct snd_soc_component *component, nau8821->jack = jack; /* Initiate jack detection work queue */ - INIT_WORK(&nau8821->jdet_work, nau8821_jdet_work); + INIT_DELAYED_WORK(&nau8821->jdet_work, nau8821_jdet_work); + ret = devm_request_threaded_irq(nau8821->dev, nau8821->irq, NULL, nau8821_interrupt, IRQF_TRIGGER_LOW | IRQF_ONESHOT, "nau8821", nau8821); diff --git a/sound/soc/codecs/nau8821.h b/sound/soc/codecs/nau8821.h index f0935ffafcbe..88602923780d 100644 --- a/sound/soc/codecs/nau8821.h +++ b/sound/soc/codecs/nau8821.h @@ -561,7 +561,7 @@ struct nau8821 { struct regmap *regmap; struct snd_soc_dapm_context *dapm; struct snd_soc_jack *jack; - struct work_struct jdet_work; + struct delayed_work jdet_work; int irq; int clk_id; int micbias_voltage; -- cgit v1.2.3 From ed25dcfbc4327570b28f0328a8e17d121434c0ea Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 26 Sep 2025 12:41:07 -0700 Subject: KVM: arm64: nv: Don't treat ZCR_EL2 as a 'mapped' register Unlike the other mapped EL2 sysregs ZCR_EL2 isn't guaranteed to be resident when a vCPU is loaded as it actually follows the SVE context. As such, the contents of ZCR_EL1 may belong to another guest if the vCPU has been preempted before reaching sysreg emulation. Unconditionally use the in-memory value of ZCR_EL2 and switch to the memory-only accessors. The in-memory value is guaranteed to be valid as fpsimd_lazy_switch_to_{guest,host}() will restore/save the register appropriately. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 91053aa832d0..4a75e5f0c259 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -203,7 +203,6 @@ static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg, MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL ); MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL ); MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); - MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL ); MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL ); case CNTHCTL_EL2: @@ -2709,14 +2708,13 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu, } if (!p->is_write) { - p->regval = vcpu_read_sys_reg(vcpu, ZCR_EL2); + p->regval = __vcpu_sys_reg(vcpu, ZCR_EL2); return true; } vq = SYS_FIELD_GET(ZCR_ELx, LEN, p->regval) + 1; vq = min(vq, vcpu_sve_max_vq(vcpu)); - vcpu_write_sys_reg(vcpu, vq - 1, ZCR_EL2); - + __vcpu_assign_sys_reg(vcpu, ZCR_EL2, vq - 1); return true; } -- cgit v1.2.3 From 9a1950f97741a23fc68a7b2cfd487e059d389be5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 26 Sep 2025 12:41:08 -0700 Subject: KVM: arm64: nv: Don't advance PC when pending an SVE exception Jan reports that running a nested guest on Neoverse-V2 leads to a WARN in the host due to simultaneously pending an exception and PC increment after an access to ZCR_EL2. Returning true from a sysreg accessor is an indication that the sysreg instruction has been retired. Of course this isn't the case when we've pended a synchronous SVE exception for the guest. Fix the return value and let the exception propagate to the guest as usual. Reported-by: Jan Kotas Closes: https://lore.kernel.org/kvmarm/865xd61tt5.wl-maz@kernel.org/ Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4a75e5f0c259..ee8a7033c85b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2704,7 +2704,7 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu, if (guest_hyp_sve_traps_enabled(vcpu)) { kvm_inject_nested_sve_trap(vcpu); - return true; + return false; } if (!p->is_write) { -- cgit v1.2.3 From a46c09b382eea3f9e3d16576096b987a2171fcca Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 26 Sep 2025 15:42:46 -0700 Subject: KVM: arm64: Use the in-context stage-1 in __kvm_find_s1_desc_level() Running the external_aborts selftest at EL2 leads to an ugly splat due to the stage-1 MMU being disabled for the walked context, owing to the fact that __kvm_find_s1_desc_level() is hardcoded to the EL1&0 regime. Select the appropriate translation regime for the stage-1 walk based on the current vCPU context. Fixes: b8e625167a32 ("KVM: arm64: Add S1 IPA to page table level walker") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 20bb9af125b1..e2e06ec8a67b 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1602,13 +1602,17 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) .fn = match_s1_desc, .priv = &dm, }, - .regime = TR_EL10, .as_el0 = false, .pan = false, }; struct s1_walk_result wr = {}; int ret; + if (is_hyp_ctxt(vcpu)) + wi.regime = vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; + else + wi.regime = TR_EL10; + ret = setup_s1_walk(vcpu, &wi, &wr, va); if (ret) return ret; -- cgit v1.2.3 From 890c608b4d5e6a616693da92a2d4e7de4ab9e6c5 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 26 Sep 2025 15:44:54 -0700 Subject: KVM: arm64: selftests: Test effective value of HCR_EL2.AMO A defect against the architecture now allows an implementation to treat AMO as 1 when HCR_EL2.{E2H, TGE} = {1, 0}. KVM now takes advantage of this interpretation to address a quality of emulation issue w.r.t. SError injection. Add a corresponding test case and expect a pending SError to be taken. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- .../testing/selftests/kvm/arm64/external_aborts.c | 43 ++++++++++++++++++++++ .../selftests/kvm/include/arm64/processor.h | 12 +++++- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c index 592b26ded779..d8fe17a6cc59 100644 --- a/tools/testing/selftests/kvm/arm64/external_aborts.c +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -359,6 +359,44 @@ static void test_mmio_ease(void) kvm_vm_free(vm); } +static void test_serror_amo_guest(void) +{ + /* + * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked) + * since the write is redirected to memory. But don't write (intentionally) + * broken code! + */ + sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0); + isb(); + + GUEST_SYNC(0); + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + /* + * KVM treats the effective value of AMO as 1 when + * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when + * unmasked. + */ + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror_amo(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + int main(void) { test_mmio_abort(); @@ -369,4 +407,9 @@ int main(void) test_serror_emulated(); test_mmio_ease(); test_s1ptw_abort(); + + if (!test_supports_el2()) + return 0; + + test_serror_amo(); } diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 6f481475c135..ff928716574d 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -305,7 +305,17 @@ void test_wants_mte(void); void test_disable_default_vgic(void); bool vm_supports_el2(struct kvm_vm *vm); -static bool vcpu_has_el2(struct kvm_vcpu *vcpu) + +static inline bool test_supports_el2(void) +{ + struct kvm_vm *vm = vm_create(1); + bool supported = vm_supports_el2(vm); + + kvm_vm_free(vm); + return supported; +} + +static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu) { return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); } -- cgit v1.2.3 From cb49b7b8622e914171e9eb7197c006320889e0fc Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 26 Sep 2025 08:58:38 -0700 Subject: KVM: arm64: selftests: Track width of timer counter as "int", not "uint64_t" Store the width of arm64's timer counter as an "int", not a "uint64_t". ilog2() returns an "int", and more importantly using what is an "unsigned long" under the hood makes clang unhappy due to a type mismatch when clamping the width to a sane value. arm64/arch_timer_edge_cases.c:1032:10: error: comparison of distinct pointer types ('typeof (width) *' (aka 'unsigned long *') and 'typeof (56) *' (aka 'int *')) [-Werror,-Wcompare-distinct-pointer-types] 1032 | width = clamp(width, 56, 64); | ^~~~~~~~~~~~~~~~~~~~ tools/include/linux/kernel.h:47:45: note: expanded from macro 'clamp' 47 | #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) | ^~~~~~~~~~~~ tools/include/linux/kernel.h:33:17: note: expanded from macro 'max' 33 | (void) (&_max1 == &_max2); \ | ~~~~~~ ^ ~~~~~~ tools/include/linux/kernel.h:39:9: note: expanded from macro 'min' 39 | typeof(x) _min1 = (x); \ | ^ Fixes: fad4cf944839 ("KVM: arm64: selftests: Determine effective counter width in arch_timer_edge_cases") Cc: Sebastian Ott Signed-off-by: Sean Christopherson Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index 91906414a474..993c9e38e729 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -1020,7 +1020,7 @@ static void set_counter_defaults(void) { const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600; uint64_t freq = read_sysreg(CNTFRQ_EL0); - uint64_t width = ilog2(MIN_ROLLOVER_SECS * freq); + int width = ilog2(MIN_ROLLOVER_SECS * freq); width = clamp(width, 56, 64); CVAL_MAX = GENMASK_ULL(width - 1, 0); -- cgit v1.2.3 From 0aa1b76fe1429629215a7c79820e4b96233ac4a3 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 30 Sep 2025 01:52:37 -0700 Subject: KVM: arm64: Prevent access to vCPU events before init Another day, another syzkaller bug. KVM erroneously allows userspace to pend vCPU events for a vCPU that hasn't been initialized yet, leading to KVM interpreting a bunch of uninitialized garbage for routing / injecting the exception. In one case the injection code and the hyp disagree on whether the vCPU has a 32bit EL1 and put the vCPU into an illegal mode for AArch64, tripping the BUG() in exception_target_el() during the next injection: kernel BUG at arch/arm64/kvm/inject_fault.c:40! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP CPU: 3 UID: 0 PID: 318 Comm: repro Not tainted 6.17.0-rc4-00104-g10fd0285305d #6 PREEMPT Hardware name: linux,dummy-virt (DT) pstate: 21402009 (nzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) pc : exception_target_el+0x88/0x8c lr : pend_serror_exception+0x18/0x13c sp : ffff800082f03a10 x29: ffff800082f03a10 x28: ffff0000cb132280 x27: 0000000000000000 x26: 0000000000000000 x25: ffff0000c2a99c20 x24: 0000000000000000 x23: 0000000000008000 x22: 0000000000000002 x21: 0000000000000004 x20: 0000000000008000 x19: ffff0000c2a99c20 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 00000000200000c0 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffff800082f03af8 x7 : 0000000000000000 x6 : 0000000000000000 x5 : ffff800080f621f0 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 000000000040009b x1 : 0000000000000003 x0 : ffff0000c2a99c20 Call trace: exception_target_el+0x88/0x8c (P) kvm_inject_serror_esr+0x40/0x3b4 __kvm_arm_vcpu_set_events+0xf0/0x100 kvm_arch_vcpu_ioctl+0x180/0x9d4 kvm_vcpu_ioctl+0x60c/0x9f4 __arm64_sys_ioctl+0xac/0x104 invoke_syscall+0x48/0x110 el0_svc_common.constprop.0+0x40/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x34/0xf0 el0t_64_sync_handler+0xa0/0xe4 el0t_64_sync+0x198/0x19c Code: f946bc01 b4fffe61 9101e020 17fffff2 (d4210000) Reject the ioctls outright as no sane VMM would call these before KVM_ARM_VCPU_INIT anyway. Even if it did the exception would've been thrown away by the eventual reset of the vCPU's state. Cc: stable@vger.kernel.org # 6.17 Fixes: b7b27facc7b5 ("arm/arm64: KVM: Add KVM_GET/SET_VCPU_EVENTS") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f21d1b7f20f8..f01cacb669cf 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1794,6 +1794,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_GET_VCPU_EVENTS: { struct kvm_vcpu_events events; + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + if (kvm_arm_vcpu_get_events(vcpu, &events)) return -EINVAL; @@ -1805,6 +1808,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_SET_VCPU_EVENTS: { struct kvm_vcpu_events events; + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + if (copy_from_user(&events, argp, sizeof(events))) return -EFAULT; -- cgit v1.2.3 From cc4309324dc695f62d25d56c0b29805e9724170c Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 30 Sep 2025 16:36:20 -0700 Subject: KVM: arm64: Document vCPU event ioctls as requiring init'ed vCPU KVM rejects calls to KVM_{GET,SET}_VCPU_EVENTS for an uninitialized vCPU as of commit cc96679f3c03 ("KVM: arm64: Prevent access to vCPU events before init"). Update the corresponding API documentation. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/api.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 6ae24c5ca559..4973c74db5c6 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1229,6 +1229,9 @@ It is not possible to read back a pending external abort (injected via KVM_SET_VCPU_EVENTS or otherwise) because such an exception is always delivered directly to the virtual CPU). +Calling this ioctl on a vCPU that hasn't been initialized will return +-ENOEXEC. + :: struct kvm_vcpu_events { @@ -1309,6 +1312,8 @@ exceptions by manipulating individual registers using the KVM_SET_ONE_REG API. See KVM_GET_VCPU_EVENTS for the data structure. +Calling this ioctl on a vCPU that hasn't been initialized will return +-ENOEXEC. 4.33 KVM_GET_DEBUGREGS ---------------------- -- cgit v1.2.3 From a133052666bed0dc0b169952e9d3f9e6b2125f9a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 30 Sep 2025 12:33:02 -0700 Subject: KVM: selftests: Fix irqfd_test for non-x86 architectures The KVM_IRQFD ioctl fails if no irqchip is present in-kernel, which isn't too surprising as there's not much KVM can do for an IRQ if it cannot resolve a destination. As written the irqfd_test assumes that a 'default' VM created in selftests has an in-kernel irqchip created implicitly. That may be the case on x86 but it isn't necessarily true on other architectures. Add an arch predicate indicating if 'default' VMs get an irqchip and make the irqfd_test depend on it. Work around arm64 VGIC initialization requirements by using vm_create_with_one_vcpu(), ignoring the created vCPU as it isn't used for the test. Reported-by: Sebastian Ott Reported-by: Naresh Kamboju Acked-by: Sean Christopherson Fixes: 7e9b231c402a ("KVM: selftests: Add a KVM_IRQFD test to verify uniqueness requirements") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/include/kvm_util.h | 2 ++ tools/testing/selftests/kvm/irqfd_test.c | 14 +++++++++++--- tools/testing/selftests/kvm/lib/arm64/processor.c | 5 +++++ tools/testing/selftests/kvm/lib/kvm_util.c | 5 +++++ tools/testing/selftests/kvm/lib/s390/processor.c | 5 +++++ tools/testing/selftests/kvm/lib/x86/processor.c | 5 +++++ 6 files changed, 33 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 26cc30290e76..112d3f443a17 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -1273,4 +1273,6 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); uint32_t guest_get_vcpuid(void); +bool kvm_arch_has_default_irqchip(void); + #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c index 7c301b4c7005..5d7590d01868 100644 --- a/tools/testing/selftests/kvm/irqfd_test.c +++ b/tools/testing/selftests/kvm/irqfd_test.c @@ -89,11 +89,19 @@ static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd) int main(int argc, char *argv[]) { pthread_t racing_thread; + struct kvm_vcpu *unused; int r, i; - /* Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. */ - vm1 = vm_create(1); - vm2 = vm_create(1); + TEST_REQUIRE(kvm_arch_has_default_irqchip()); + + /* + * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also + * create an unused vCPU as certain architectures (like arm64) need to + * complete IRQ chip initialization after all possible vCPUs for a VM + * have been created. + */ + vm1 = vm_create_with_one_vcpu(&unused, NULL); + vm2 = vm_create_with_one_vcpu(&unused, NULL); WRITE_ONCE(__eventfd, kvm_new_eventfd()); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 369a4c87dd8f..54f6d17c78f7 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -725,3 +725,8 @@ void kvm_arch_vm_release(struct kvm_vm *vm) if (vm->arch.has_gic) close(vm->arch.gic_fd); } + +bool kvm_arch_has_default_irqchip(void) +{ + return request_vgic && kvm_supports_vgic_v3(); +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6743fbd9bd67..a35adfebfa23 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2344,3 +2344,8 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr) pg = paddr >> vm->page_shift; return sparsebit_is_set(region->protected_phy_pages, pg); } + +__weak bool kvm_arch_has_default_irqchip(void) +{ + return false; +} diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 20cfe970e3e3..8ceeb17c819a 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -221,3 +221,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index c748cd9b2eef..b418502c5ecc 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -1318,3 +1318,8 @@ bool sys_clocksource_is_based_on_tsc(void) return ret; } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} -- cgit v1.2.3 From 05a02490faeb952f1c8d2f5c38346fa0a717a483 Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Tue, 30 Sep 2025 16:56:21 +0300 Subject: KVM: arm64: Remove unreachable break after return Remove an unnecessary 'break' statement that follows a 'return' in arch/arm64/kvm/at.c. The break is unreachable. Signed-off-by: Osama Abdelkader Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index e2e06ec8a67b..be26d5aa668c 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -91,7 +91,6 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o case OP_AT_S1E2W: case OP_AT_S1E2A: return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; - break; default: return (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; -- cgit v1.2.3 From 9a7f87eb587da49993f47f44c4c5535d8de76750 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sun, 12 Oct 2025 23:43:52 +0800 Subject: KVM: arm64: selftests: Sync ID_AA64PFR1, MPIDR, CLIDR in guest We forgot to sync several registers (ID_AA64PFR1, MPIDR, CLIDR) in guest to make sure that the guest had seen the written value. Add them to the list. Signed-off-by: Zenghui Yu Reviewed-By: Ben Horgan Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/set_id_regs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 8ff1e853f7f8..5e24f77868b5 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -249,11 +249,14 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1); GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); + GUEST_REG_SYNC(SYS_MPIDR_EL1); + GUEST_REG_SYNC(SYS_CLIDR_EL1); GUEST_REG_SYNC(SYS_CTR_EL0); GUEST_REG_SYNC(SYS_MIDR_EL1); GUEST_REG_SYNC(SYS_REVIDR_EL1); -- cgit v1.2.3 From b9ce79887e270ed64ed499aa69f903cdca401c2f Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 10 Oct 2025 21:04:16 +0200 Subject: smb: client: Return a status code only as a constant in sid_to_id() Return a status code without storing it in an intermediate variable. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Steve French --- fs/smb/client/cifsacl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 63b3b1290bed..ce2ebc213a1d 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -339,7 +339,6 @@ int sid_to_id(struct cifs_sb_info *cifs_sb, struct smb_sid *psid, struct cifs_fattr *fattr, uint sidtype) { - int rc = 0; struct key *sidkey; char *sidstr; const struct cred *saved_cred; @@ -446,12 +445,12 @@ out_revert_creds: * fails then we just fall back to using the ctx->linux_uid/linux_gid. */ got_valid_id: - rc = 0; if (sidtype == SIDOWNER) fattr->cf_uid = fuid; else fattr->cf_gid = fgid; - return rc; + + return 0; } int -- cgit v1.2.3 From 911063b590ce77d473e92716f05f34712f97ef95 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 10 Oct 2025 14:48:13 +0200 Subject: smb: client: Omit one redundant variable assignment in cifs_xattr_set() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The local variable “rc” is assigned a value in an if branch without using it before it is reassigned there. Thus delete this assignment statement. Signed-off-by: Markus Elfring Signed-off-by: Steve French --- fs/smb/client/xattr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c index b88fa04f5792..029910d56c22 100644 --- a/fs/smb/client/xattr.c +++ b/fs/smb/client/xattr.c @@ -178,7 +178,6 @@ static int cifs_xattr_set(const struct xattr_handler *handler, memcpy(pacl, value, size); if (pTcon->ses->server->ops->set_acl) { int aclflags = 0; - rc = 0; switch (handler->flags) { case XATTR_CIFS_NTSD_FULL: -- cgit v1.2.3 From e487f13cc94fa80c71f217d7b176e48fbb5d6c46 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:21 +0200 Subject: smb: smbdirect: introduce smbdirect_mr_io.{kref,mutex} and SMBDIRECT_MR_DISABLED This will be used in the next commits in order to improve the client code. A broken connection can just disable the smbdirect_mr_io while keeping the memory arround for the caller. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index db22a1d0546b..361db7f9f623 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -437,13 +437,22 @@ enum smbdirect_mr_state { SMBDIRECT_MR_READY, SMBDIRECT_MR_REGISTERED, SMBDIRECT_MR_INVALIDATED, - SMBDIRECT_MR_ERROR + SMBDIRECT_MR_ERROR, + SMBDIRECT_MR_DISABLED }; struct smbdirect_mr_io { struct smbdirect_socket *socket; struct ib_cqe cqe; + /* + * We can have up to two references: + * 1. by the connection + * 2. by the registration + */ + struct kref kref; + struct mutex mutex; + struct list_head list; enum smbdirect_mr_state state; -- cgit v1.2.3 From abe5b71c391352127925bf951f3169d205c5caa7 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:22 +0200 Subject: smb: client: change smbd_deregister_mr() to return void No callers checks the return value and this makes further changes easier. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 4 +--- fs/smb/client/smbdirect.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 316f398c70f4..a20aa2ddf57d 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2612,7 +2612,7 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) * and we have to locally invalidate the buffer to prevent data is being * modified by remote peer after upper layer consumes it */ -int smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr) +void smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr) { struct ib_send_wr *wr; struct smbdirect_socket *sc = smbdirect_mr->socket; @@ -2662,8 +2662,6 @@ int smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr) done: if (atomic_dec_and_test(&sc->mr_io.used.count)) wake_up(&sc->mr_io.cleanup.wait_queue); - - return rc; } static bool smb_set_sge(struct smb_extract_to_rdma *rdma, diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index d67ac5ddaff4..577d37dbeb8a 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -60,7 +60,7 @@ int smbd_send(struct TCP_Server_Info *server, struct smbdirect_mr_io *smbd_register_mr( struct smbd_connection *info, struct iov_iter *iter, bool writing, bool need_invalidate); -int smbd_deregister_mr(struct smbdirect_mr_io *mr); +void smbd_deregister_mr(struct smbdirect_mr_io *mr); #else #define cifs_rdma_enabled(server) 0 -- cgit v1.2.3 From 19421ec198981f60373080af67e67b6a6fcf191e Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:23 +0200 Subject: smb: client: let destroy_mr_list() call list_del(&mr->list) This makes the code clearer and will make further changes easier. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index a20aa2ddf57d..b7be67dacd09 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2363,6 +2363,7 @@ static void destroy_mr_list(struct smbdirect_socket *sc) mr->sgt.nents, mr->dir); ib_dereg_mr(mr->mr); kfree(mr->sgt.sgl); + list_del(&mr->list); kfree(mr); } } -- cgit v1.2.3 From a8e128b293e2b08d4ecca7c63ed1cb5b97f30af9 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:24 +0200 Subject: smb: client: let destroy_mr_list() remove locked from the list This should make sure get_mr() can't see the removed entries. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index b7be67dacd09..b974ca4e0b2e 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2355,9 +2355,16 @@ static void smbd_mr_recovery_work(struct work_struct *work) static void destroy_mr_list(struct smbdirect_socket *sc) { struct smbdirect_mr_io *mr, *tmp; + LIST_HEAD(all_list); + unsigned long flags; disable_work_sync(&sc->mr_io.recovery_work); - list_for_each_entry_safe(mr, tmp, &sc->mr_io.all.list, list) { + + spin_lock_irqsave(&sc->mr_io.all.lock, flags); + list_splice_tail_init(&sc->mr_io.all.list, &all_list); + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); + + list_for_each_entry_safe(mr, tmp, &all_list, list) { if (mr->state == SMBDIRECT_MR_INVALIDATED) ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); -- cgit v1.2.3 From 9bebb8924b27f06e7072f0b18a5f78cef561c810 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:25 +0200 Subject: smb: client: improve logic in allocate_mr_list() - use 'mr' as variable name - use goto lables for easier cleanup - use destroy_mr_list() - style fixes - INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work) on success This will make further changes easier. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 65 +++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index b974ca4e0b2e..658ca11cb26c 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2385,10 +2385,9 @@ static void destroy_mr_list(struct smbdirect_socket *sc) static int allocate_mr_list(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; - int i; - struct smbdirect_mr_io *smbdirect_mr, *tmp; - - INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work); + struct smbdirect_mr_io *mr; + int ret; + u32 i; if (sp->responder_resources == 0) { log_rdma_mr(ERR, "responder_resources negotiated as 0\n"); @@ -2397,42 +2396,48 @@ static int allocate_mr_list(struct smbdirect_socket *sc) /* Allocate more MRs (2x) than hardware responder_resources */ for (i = 0; i < sp->responder_resources * 2; i++) { - smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); - if (!smbdirect_mr) - goto cleanup_entries; - smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, sc->mr_io.type, - sp->max_frmr_depth); - if (IS_ERR(smbdirect_mr->mr)) { + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + ret = -ENOMEM; + goto kzalloc_mr_failed; + } + + mr->mr = ib_alloc_mr(sc->ib.pd, + sc->mr_io.type, + sp->max_frmr_depth); + if (IS_ERR(mr->mr)) { + ret = PTR_ERR(mr->mr); log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", sc->mr_io.type, sp->max_frmr_depth); - goto out; + goto ib_alloc_mr_failed; } - smbdirect_mr->sgt.sgl = kcalloc(sp->max_frmr_depth, - sizeof(struct scatterlist), - GFP_KERNEL); - if (!smbdirect_mr->sgt.sgl) { + + mr->sgt.sgl = kcalloc(sp->max_frmr_depth, + sizeof(struct scatterlist), + GFP_KERNEL); + if (!mr->sgt.sgl) { + ret = -ENOMEM; log_rdma_mr(ERR, "failed to allocate sgl\n"); - ib_dereg_mr(smbdirect_mr->mr); - goto out; + goto kcalloc_sgl_failed; } - smbdirect_mr->state = SMBDIRECT_MR_READY; - smbdirect_mr->socket = sc; + mr->state = SMBDIRECT_MR_READY; + mr->socket = sc; - list_add_tail(&smbdirect_mr->list, &sc->mr_io.all.list); + list_add_tail(&mr->list, &sc->mr_io.all.list); atomic_inc(&sc->mr_io.ready.count); } + + INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work); + return 0; -out: - kfree(smbdirect_mr); -cleanup_entries: - list_for_each_entry_safe(smbdirect_mr, tmp, &sc->mr_io.all.list, list) { - list_del(&smbdirect_mr->list); - ib_dereg_mr(smbdirect_mr->mr); - kfree(smbdirect_mr->sgt.sgl); - kfree(smbdirect_mr); - } - return -ENOMEM; +kcalloc_sgl_failed: + ib_dereg_mr(mr->mr); +ib_alloc_mr_failed: + kfree(mr); +kzalloc_mr_failed: + destroy_mr_list(sc); + return ret; } /* -- cgit v1.2.3 From c8478502960eb8fb9847b36a66380adf421cdc62 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:26 +0200 Subject: smb: client: improve logic in smbd_register_mr() - use 'mr' as variable name - style fixes This will make further changes easier. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 52 +++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 658ca11cb26c..a863b6fff87a 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2517,9 +2517,8 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, { struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_mr_io *smbdirect_mr; + struct smbdirect_mr_io *mr; int rc, num_pages; - enum dma_data_direction dir; struct ib_reg_wr *reg_wr; num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); @@ -2530,49 +2529,45 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, return NULL; } - smbdirect_mr = get_mr(sc); - if (!smbdirect_mr) { + mr = get_mr(sc); + if (!mr) { log_rdma_mr(ERR, "get_mr returning NULL\n"); return NULL; } - dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - smbdirect_mr->dir = dir; - smbdirect_mr->need_invalidate = need_invalidate; - smbdirect_mr->sgt.nents = 0; - smbdirect_mr->sgt.orig_nents = 0; + mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + mr->need_invalidate = need_invalidate; + mr->sgt.nents = 0; + mr->sgt.orig_nents = 0; log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n", num_pages, iov_iter_count(iter), sp->max_frmr_depth); - smbd_iter_to_mr(iter, &smbdirect_mr->sgt, sp->max_frmr_depth); + smbd_iter_to_mr(iter, &mr->sgt, sp->max_frmr_depth); - rc = ib_dma_map_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, - smbdirect_mr->sgt.nents, dir); + rc = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); if (!rc) { log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", - num_pages, dir, rc); + num_pages, mr->dir, rc); goto dma_map_error; } - rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgt.sgl, - smbdirect_mr->sgt.nents, NULL, PAGE_SIZE); - if (rc != smbdirect_mr->sgt.nents) { + rc = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE); + if (rc != mr->sgt.nents) { log_rdma_mr(ERR, - "ib_map_mr_sg failed rc = %d nents = %x\n", - rc, smbdirect_mr->sgt.nents); + "ib_map_mr_sg failed rc = %d nents = %x\n", + rc, mr->sgt.nents); goto map_mr_error; } - ib_update_fast_reg_key(smbdirect_mr->mr, - ib_inc_rkey(smbdirect_mr->mr->rkey)); - reg_wr = &smbdirect_mr->wr; + ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); + reg_wr = &mr->wr; reg_wr->wr.opcode = IB_WR_REG_MR; - smbdirect_mr->cqe.done = register_mr_done; - reg_wr->wr.wr_cqe = &smbdirect_mr->cqe; + mr->cqe.done = register_mr_done; + reg_wr->wr.wr_cqe = &mr->cqe; reg_wr->wr.num_sge = 0; reg_wr->wr.send_flags = IB_SEND_SIGNALED; - reg_wr->mr = smbdirect_mr->mr; - reg_wr->key = smbdirect_mr->mr->rkey; + reg_wr->mr = mr->mr; + reg_wr->key = mr->mr->rkey; reg_wr->access = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; @@ -2584,18 +2579,17 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, */ rc = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); if (!rc) - return smbdirect_mr; + return mr; log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", rc, reg_wr->key); /* If all failed, attempt to recover this MR by setting it SMBDIRECT_MR_ERROR*/ map_mr_error: - ib_dma_unmap_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, - smbdirect_mr->sgt.nents, smbdirect_mr->dir); + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); dma_map_error: - smbdirect_mr->state = SMBDIRECT_MR_ERROR; + mr->state = SMBDIRECT_MR_ERROR; if (atomic_dec_and_test(&sc->mr_io.used.count)) wake_up(&sc->mr_io.cleanup.wait_queue); -- cgit v1.2.3 From 56c817e31acedc8a9041b181a15755e5a7b55f2b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:27 +0200 Subject: smb: client: improve logic in smbd_deregister_mr() - use 'mr' as variable name - style fixes This will make further changes easier. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index a863b6fff87a..af0642e94d7e 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2619,44 +2619,41 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) * and we have to locally invalidate the buffer to prevent data is being * modified by remote peer after upper layer consumes it */ -void smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr) +void smbd_deregister_mr(struct smbdirect_mr_io *mr) { - struct ib_send_wr *wr; - struct smbdirect_socket *sc = smbdirect_mr->socket; - int rc = 0; + struct smbdirect_socket *sc = mr->socket; + + if (mr->need_invalidate) { + struct ib_send_wr *wr = &mr->inv_wr; + int rc; - if (smbdirect_mr->need_invalidate) { /* Need to finish local invalidation before returning */ - wr = &smbdirect_mr->inv_wr; wr->opcode = IB_WR_LOCAL_INV; - smbdirect_mr->cqe.done = local_inv_done; - wr->wr_cqe = &smbdirect_mr->cqe; + mr->cqe.done = local_inv_done; + wr->wr_cqe = &mr->cqe; wr->num_sge = 0; - wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey; + wr->ex.invalidate_rkey = mr->mr->rkey; wr->send_flags = IB_SEND_SIGNALED; - init_completion(&smbdirect_mr->invalidate_done); + init_completion(&mr->invalidate_done); rc = ib_post_send(sc->ib.qp, wr, NULL); if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); smbd_disconnect_rdma_connection(sc); goto done; } - wait_for_completion(&smbdirect_mr->invalidate_done); - smbdirect_mr->need_invalidate = false; + wait_for_completion(&mr->invalidate_done); + mr->need_invalidate = false; } else /* * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED * and defer to mr_recovery_work to recover the MR for next use */ - smbdirect_mr->state = SMBDIRECT_MR_INVALIDATED; + mr->state = SMBDIRECT_MR_INVALIDATED; - if (smbdirect_mr->state == SMBDIRECT_MR_INVALIDATED) { - ib_dma_unmap_sg( - sc->ib.dev, smbdirect_mr->sgt.sgl, - smbdirect_mr->sgt.nents, - smbdirect_mr->dir); - smbdirect_mr->state = SMBDIRECT_MR_READY; + if (mr->state == SMBDIRECT_MR_INVALIDATED) { + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + mr->state = SMBDIRECT_MR_READY; if (atomic_inc_return(&sc->mr_io.ready.count) == 1) wake_up(&sc->mr_io.ready.wait_queue); } else -- cgit v1.2.3 From b9c0becc2fceb53e4a958575344e92c0e4f812bb Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:28 +0200 Subject: smb: client: call ib_dma_unmap_sg if mr->sgt.nents is not 0 This seems to be the more reliable way to check if we need to call ib_dma_unmap_sg(). Fixes: c7398583340a ("CIFS: SMBD: Implement RDMA memory registration") Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index af0642e94d7e..21dcd326af3d 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2365,9 +2365,8 @@ static void destroy_mr_list(struct smbdirect_socket *sc) spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); list_for_each_entry_safe(mr, tmp, &all_list, list) { - if (mr->state == SMBDIRECT_MR_INVALIDATED) - ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, - mr->sgt.nents, mr->dir); + if (mr->sgt.nents) + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); ib_dereg_mr(mr->mr); kfree(mr->sgt.sgl); list_del(&mr->list); @@ -2589,6 +2588,7 @@ map_mr_error: ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); dma_map_error: + mr->sgt.nents = 0; mr->state = SMBDIRECT_MR_ERROR; if (atomic_dec_and_test(&sc->mr_io.used.count)) wake_up(&sc->mr_io.cleanup.wait_queue); @@ -2651,8 +2651,12 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr) */ mr->state = SMBDIRECT_MR_INVALIDATED; - if (mr->state == SMBDIRECT_MR_INVALIDATED) { + if (mr->sgt.nents) { ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + mr->sgt.nents = 0; + } + + if (mr->state == SMBDIRECT_MR_INVALIDATED) { mr->state = SMBDIRECT_MR_READY; if (atomic_inc_return(&sc->mr_io.ready.count) == 1) wake_up(&sc->mr_io.ready.wait_queue); -- cgit v1.2.3 From 1ef0e16c3d7ca07432987840d8eef1a9ffb67dec Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:29 +0200 Subject: smb: client: let destroy_mr_list() call ib_dereg_mr() before ib_dma_unmap_sg() This is more consistent as we call ib_dma_unmap_sg() only when the memory is no longer registered. This is the same pattern as calling ib_dma_unmap_sg() after IB_WR_LOCAL_INV. Fixes: c7398583340a ("CIFS: SMBD: Implement RDMA memory registration") Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 21dcd326af3d..c3330e43488f 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2365,9 +2365,10 @@ static void destroy_mr_list(struct smbdirect_socket *sc) spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); list_for_each_entry_safe(mr, tmp, &all_list, list) { + if (mr->mr) + ib_dereg_mr(mr->mr); if (mr->sgt.nents) ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); - ib_dereg_mr(mr->mr); kfree(mr->sgt.sgl); list_del(&mr->list); kfree(mr); -- cgit v1.2.3 From c35dd838666d47de2848639234ec32e3ba22b49f Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Fri, 10 Oct 2025 23:17:07 +0530 Subject: KVM: arm64: Guard PMSCR_EL1 initialization with SPE presence check Commit efad60e46057 ("KVM: arm64: Initialize PMSCR_EL1 when in VHE") does not perform sufficient check before initializing PMSCR_EL1 to 0 when running in VHE mode. On some platforms, this causes the system to hang during boot, as EL3 has not delegated access to the Profiling Buffer to the Non-secure world, nor does it reinject an UNDEF on sysreg trap. To avoid this issue, restrict the PMSCR_EL1 initialization to CPUs that support Statistical Profiling Extension (FEAT_SPE) and have the Profiling Buffer accessible in Non-secure EL1. This is determined via a new helper `cpu_has_spe()` which checks both PMSVer and PMBIDR_EL1.P. This ensures the initialization only affects CPUs where SPE is implemented and usable, preventing boot failures on platforms where SPE is not properly configured. Fixes: efad60e46057 ("KVM: arm64: Initialize PMSCR_EL1 when in VHE") Signed-off-by: Mukesh Ojha Signed-off-by: Marc Zyngier --- arch/arm64/kvm/debug.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 3515a273eaa2..3ad6b7c6e4ba 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -15,6 +15,12 @@ #include #include +static int cpu_has_spe(u64 dfr0) +{ + return cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && + !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P); +} + /** * kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value * @@ -77,13 +83,12 @@ void kvm_init_host_debug_data(void) *host_data_ptr(debug_brps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr0); *host_data_ptr(debug_wrps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr0); + if (cpu_has_spe(dfr0)) + host_data_set_flag(HAS_SPE); + if (has_vhe()) return; - if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && - !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P)) - host_data_set_flag(HAS_SPE); - /* Check if we have BRBE implemented and available at the host */ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT)) host_data_set_flag(HAS_BRBE); @@ -102,7 +107,7 @@ void kvm_init_host_debug_data(void) void kvm_debug_init_vhe(void) { /* Clear PMSCR_EL1.E{0,1}SPE which reset to UNKNOWN values. */ - if (SYS_FIELD_GET(ID_AA64DFR0_EL1, PMSVer, read_sysreg(id_aa64dfr0_el1))) + if (host_data_test_flag(HAS_SPE)) write_sysreg_el1(0, SYS_PMSCR); } -- cgit v1.2.3 From 2192d348c0aa0cc2e7249dc3709f21bfe0a0170c Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 8 Oct 2025 23:45:20 +0800 Subject: KVM: arm64: selftests: Allocate vcpus with correct size vcpus array contains pointers to struct kvm_vcpu {}. It is way overkill to allocate the array with (nr_cpus * sizeof(struct kvm_vcpu)). Fix the allocation by using the correct size. Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index 87922a89b134..b134a304f0a6 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -331,7 +331,7 @@ static void setup_vm(void) { int i; - vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu)); + vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *)); TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus); -- cgit v1.2.3 From d5e6310a0d996493b1af9f3eeec418350523388b Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 7 Oct 2025 12:52:55 -0700 Subject: KVM: arm64: selftests: Actually enable IRQs in vgic_lpi_stress vgic_lpi_stress rather hilariously leaves IRQs disabled for the duration of the test. While the ITS translation of MSIs happens regardless of this, for completeness the guest should actually handle the LPIs. Signed-off-by: Oliver Upton Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index b134a304f0a6..687d04463983 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -123,6 +123,7 @@ static void guest_setup_gic(void) static void guest_code(size_t nr_lpis) { guest_setup_gic(); + local_irq_enable(); GUEST_SYNC(0); -- cgit v1.2.3 From 3193287ddffbce29fd1a79d812f543c0fe4861d1 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Tue, 7 Oct 2025 16:07:13 +0000 Subject: KVM: arm64: gic-v3: Only set ICH_HCR traps for v2-on-v3 or v3 guests The ICH_HCR_EL2 traps are used when running on GICv3 hardware, or when running a GICv3-based guest using FEAT_GCIE_LEGACY on GICv5 hardware. When running a GICv2 guest on GICv3 hardware the traps are used to ensure that the guest never sees any part of GICv3 (only GICv2 is visible to the guest), and when running a GICv3 guest they are used to trap in specific scenarios. They are not applicable for a GICv2-native guest, and won't be applicable for a(n upcoming) GICv5 guest. The traps themselves are configured in the vGIC CPU IF state, which is stored as a union. Updating the wrong aperture of the union risks corrupting state, and therefore needs to be avoided at all costs. Bail early if we're not running a compatible guest (GICv2 on GICv3 hardware, GICv3 native, GICv3 on GICv5 hardware). Trap everything unconditionally if we're running a GICv2 guest on GICv3 hardware. Otherwise, conditionally set up GICv3-native trapping. Signed-off-by: Sascha Bischoff Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-v3.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index f1c153106c56..6fbb4b099855 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -297,8 +297,11 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; + if (!vgic_is_v3(vcpu->kvm)) + return; + /* Hide GICv3 sysreg if necessary */ - if (!kvm_has_gicv3(vcpu->kvm)) { + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | ICH_HCR_EL2_TC); return; -- cgit v1.2.3 From 164ecbf73c3ea61455e07eefdad8050a7b569558 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Tue, 7 Oct 2025 15:48:54 +0000 Subject: Documentation: KVM: Update GICv3 docs for GICv5 hosts GICv5 hosts optionally include FEAT_GCIE_LEGACY, which allows them to execute GICv3-based VMs on GICv5 hardware. Update the GICv3 documentation to reflect this now that GICv3 guests are supports on compatible GICv5 hosts. Signed-off-by: Sascha Bischoff Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/devices/arm-vgic-v3.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/devices/arm-vgic-v3.rst b/Documentation/virt/kvm/devices/arm-vgic-v3.rst index ff02102f7141..5395ee66fc32 100644 --- a/Documentation/virt/kvm/devices/arm-vgic-v3.rst +++ b/Documentation/virt/kvm/devices/arm-vgic-v3.rst @@ -13,7 +13,8 @@ will act as the VM interrupt controller, requiring emulated user-space devices to inject interrupts to the VGIC instead of directly to CPUs. It is not possible to create both a GICv3 and GICv2 on the same VM. -Creating a guest GICv3 device requires a host GICv3 as well. +Creating a guest GICv3 device requires a host GICv3 host, or a GICv5 host with +support for FEAT_GCIE_LEGACY. Groups: -- cgit v1.2.3 From 4cab5c857d1f92b4b322e30349fdc5e2e38e7a2f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:45 +0100 Subject: KVM: arm64: Hide CNTHV_*_EL2 from userspace for nVHE guests Although we correctly UNDEF any CNTHV_*_EL2 access from the guest when E2H==0, we still expose these registers to userspace, which is a bad idea. Drop the ad-hoc UNDEF injection and switch to a .visibility() callback which will also hide the register from userspace. Fixes: 0e45981028550 ("KVM: arm64: timer: Don't adjust the EL2 virtual timer offset") Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index ee8a7033c85b..9f2f4e0b042e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1594,16 +1594,6 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } -static bool access_hv_timer(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (!vcpu_el2_e2h_is_set(vcpu)) - return undef_access(vcpu, p, r); - - return access_arch_timer(vcpu, p, r); -} - static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, s64 new, s64 cur) { @@ -2831,6 +2821,16 @@ static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu, return __el2_visibility(vcpu, rd, s1pie_visibility); } +static unsigned int cnthv_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (vcpu_has_nv(vcpu) && + !vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2_E2H0)) + return 0; + + return REG_HIDDEN; +} + static bool access_mdcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -3691,9 +3691,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0), EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), - { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer }, - EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0), - EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0), + { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer, .visibility = cnthv_visibility }, + EL2_REG_FILTERED(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0, cnthv_visibility), + EL2_REG_FILTERED(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0, cnthv_visibility), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, -- cgit v1.2.3 From aa68975c973ed3b0bd4ff513113495588afb855c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:46 +0100 Subject: KVM: arm64: Introduce timer_context_to_vcpu() helper We currently have a vcpu pointer nested into each timer context. As we are about to remove this pointer, introduce a helper (aptly named timer_context_to_vcpu()) that returns this pointer, at least until we repaint the data structure. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 25 +++++++++++++------------ include/kvm/arm_arch_timer.h | 2 +- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index dbd74e4885e2..e5a25e743f5b 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -66,7 +66,7 @@ static int nr_timers(struct kvm_vcpu *vcpu) u32 timer_get_ctl(struct arch_timer_context *ctxt) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -85,7 +85,7 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt) u64 timer_get_cval(struct arch_timer_context *ctxt) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -104,7 +104,7 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -126,7 +126,7 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -343,7 +343,7 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) u64 ns; ctx = container_of(hrt, struct arch_timer_context, hrtimer); - vcpu = ctx->vcpu; + vcpu = timer_context_to_vcpu(ctx); trace_kvm_timer_hrtimer_expire(ctx); @@ -436,8 +436,9 @@ static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) * * But hey, it's fast, right? */ - if (is_hyp_ctxt(ctx->vcpu) && - (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) { + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); + if (is_hyp_ctxt(vcpu) && + (ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) { unsigned long val = timer_get_ctl(ctx); __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level); timer_set_ctl(ctx, val); @@ -470,7 +471,7 @@ static void timer_emulate(struct arch_timer_context *ctx) trace_kvm_timer_emulate(ctx, should_fire); if (should_fire != ctx->irq.level) - kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); + kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx); kvm_timer_update_status(ctx, should_fire); @@ -498,7 +499,7 @@ static void set_cntpoff(u64 cntpoff) static void timer_save_state(struct arch_timer_context *ctx) { - struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); enum kvm_arch_timers index = arch_timer_ctx_index(ctx); unsigned long flags; @@ -609,7 +610,7 @@ static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) static void timer_restore_state(struct arch_timer_context *ctx) { - struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); enum kvm_arch_timers index = arch_timer_ctx_index(ctx); unsigned long flags; @@ -668,7 +669,7 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) { - struct kvm_vcpu *vcpu = ctx->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); bool phys_active = false; /* @@ -677,7 +678,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) * this point and the register restoration, we'll take the * interrupt anyway. */ - kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); + kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx); if (irqchip_in_kernel(vcpu->kvm)) phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 681cf0c8b9df..d188c716d03c 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -128,7 +128,7 @@ void kvm_timer_init_vhe(void); #define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER]) #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) - +#define timer_context_to_vcpu(ctx) ((ctx)->vcpu) #define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data) #define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) -- cgit v1.2.3 From 8625a670afb05f1e1d69d50a74dbcc9d1b855efe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:47 +0100 Subject: KVM: arm64: Replace timer context vcpu pointer with timer_id Having to follow a pointer to a vcpu is pretty dumb, when the timers are are a fixed offset in the vcpu structure itself. Trade the vcpu pointer for a timer_id, which can then be used to compute the vcpu address as needed. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 4 ++-- include/kvm/arm_arch_timer.h | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index e5a25e743f5b..c832c293676a 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -149,7 +149,7 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) { if (!ctxt->offset.vm_offset) { - WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); + WARN(offset, "timer %d\n", arch_timer_ctx_index(ctxt)); return; } @@ -1064,7 +1064,7 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); struct kvm *kvm = vcpu->kvm; - ctxt->vcpu = vcpu; + ctxt->timer_id = timerid; if (timerid == TIMER_VTIMER) ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index d188c716d03c..d8e400cb2bff 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -51,8 +51,6 @@ struct arch_timer_vm_data { }; struct arch_timer_context { - struct kvm_vcpu *vcpu; - /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; u64 ns_frac; @@ -71,6 +69,9 @@ struct arch_timer_context { bool level; } irq; + /* Who am I? */ + enum kvm_arch_timers timer_id; + /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; }; @@ -127,9 +128,9 @@ void kvm_timer_init_vhe(void); #define vcpu_hvtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HVTIMER]) #define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER]) -#define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) -#define timer_context_to_vcpu(ctx) ((ctx)->vcpu) -#define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data) +#define arch_timer_ctx_index(ctx) ((ctx)->timer_id) +#define timer_context_to_vcpu(ctx) container_of((ctx), struct kvm_vcpu, arch.timer_cpu.timers[(ctx)->timer_id]) +#define timer_vm_data(ctx) (&(timer_context_to_vcpu(ctx)->kvm->arch.timer_data)) #define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From a92d552266890f83126fdef4f777a985cc1302bd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:48 +0100 Subject: KVM: arm64: Make timer_set_offset() generally accessible Move the timer_set_offset() helper to arm_arch_timer.h, so that it is next to timer_get_offset(), and accessible by the rest of KVM. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 10 ---------- include/kvm/arm_arch_timer.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index c832c293676a..27662a3a3043 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -146,16 +146,6 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) } } -static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) -{ - if (!ctxt->offset.vm_offset) { - WARN(offset, "timer %d\n", arch_timer_ctx_index(ctxt)); - return; - } - - WRITE_ONCE(*ctxt->offset.vm_offset, offset); -} - u64 kvm_phys_timer_read(void) { return timecounter->cc->read(timecounter->cc); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index d8e400cb2bff..5f7f2ed8817c 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -179,4 +179,14 @@ static inline u64 timer_get_offset(struct arch_timer_context *ctxt) return offset; } +static inline void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) +{ + if (!ctxt->offset.vm_offset) { + WARN(offset, "timer %d\n", arch_timer_ctx_index(ctxt)); + return; + } + + WRITE_ONCE(*ctxt->offset.vm_offset, offset); +} + #endif -- cgit v1.2.3 From 77a0c42eaf03c66936429d190bb2ea1a214bd528 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:49 +0100 Subject: KVM: arm64: Add timer UAPI workaround to sysreg infrastructure Amongst the numerous bugs that plague the KVM/arm64 UAPI, one of the most annoying thing is that the userspace view of the virtual timer has its CVAL and CNT encodings swapped. In order to reduce the amount of code that has to know about this, start by adding handling for this bug in the sys_reg code. Nothing is making use of it yet, as the code responsible for userspace interaction is catching the accesses early. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 33 ++++++++++++++++++++++++++++++--- arch/arm64/kvm/sys_regs.h | 6 ++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 9f2f4e0b042e..8e6f50f54b4b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -5231,15 +5231,28 @@ static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) } } +static u64 kvm_one_reg_to_id(const struct kvm_one_reg *reg) +{ + switch(reg->id) { + case KVM_REG_ARM_TIMER_CVAL: + return TO_ARM64_SYS_REG(CNTV_CVAL_EL0); + case KVM_REG_ARM_TIMER_CNT: + return TO_ARM64_SYS_REG(CNTVCT_EL0); + default: + return reg->id; + } +} + int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num) { u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 id = kvm_one_reg_to_id(reg); u64 val; int ret; - r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + r = id_to_sys_reg_desc(vcpu, id, table, num); if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; @@ -5272,13 +5285,14 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, { u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 id = kvm_one_reg_to_id(reg); u64 val; int ret; if (get_user(val, uaddr)) return -EFAULT; - r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + r = id_to_sys_reg_desc(vcpu, id, table, num); if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; @@ -5338,10 +5352,23 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg) static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) { + u64 idx; + if (!*uind) return true; - if (put_user(sys_reg_to_index(reg), *uind)) + switch (reg_to_encoding(reg)) { + case SYS_CNTV_CVAL_EL0: + idx = KVM_REG_ARM_TIMER_CVAL; + break; + case SYS_CNTVCT_EL0: + idx = KVM_REG_ARM_TIMER_CNT; + break; + default: + idx = sys_reg_to_index(reg); + } + + if (put_user(idx, *uind)) return false; (*uind)++; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 317abc490368..b3f904472fac 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -257,4 +257,10 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu); (val); \ }) +#define TO_ARM64_SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \ + sys_reg_Op1(SYS_ ## r), \ + sys_reg_CRn(SYS_ ## r), \ + sys_reg_CRm(SYS_ ## r), \ + sys_reg_Op2(SYS_ ## r)) + #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ -- cgit v1.2.3 From 09424d5d7d4e8b427ee4a737fb7765103789e08a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:50 +0100 Subject: KVM: arm64: Move CNT*_CTL_EL0 userspace accessors to generic infrastructure Remove the handling of CNT*_CTL_EL0 from guest.c, and move it to sys_regs.c, using a new TIMER_REG() definition to encapsulate it. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/guest.c | 4 ---- arch/arm64/kvm/sys_regs.c | 36 +++++++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 16ba5e9ac86c..dea648706fd5 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -592,10 +592,8 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) } static const u64 timer_reg_list[] = { - KVM_REG_ARM_TIMER_CTL, KVM_REG_ARM_TIMER_CNT, KVM_REG_ARM_TIMER_CVAL, - KVM_REG_ARM_PTIMER_CTL, KVM_REG_ARM_PTIMER_CNT, KVM_REG_ARM_PTIMER_CVAL, }; @@ -605,10 +603,8 @@ static const u64 timer_reg_list[] = { static bool is_timer_reg(u64 index) { switch (index) { - case KVM_REG_ARM_TIMER_CTL: case KVM_REG_ARM_TIMER_CNT: case KVM_REG_ARM_TIMER_CVAL: - case KVM_REG_ARM_PTIMER_CTL: case KVM_REG_ARM_PTIMER_CNT: case KVM_REG_ARM_PTIMER_CVAL: return true; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 8e6f50f54b4b..d97aacf4c1dc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1594,6 +1594,23 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } +static int arch_timer_set_user(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) +{ + switch (reg_to_encoding(rd)) { + case SYS_CNTV_CTL_EL0: + case SYS_CNTP_CTL_EL0: + case SYS_CNTHV_CTL_EL2: + case SYS_CNTHP_CTL_EL2: + val &= ~ARCH_TIMER_CTRL_IT_STAT; + break; + } + + __vcpu_assign_sys_reg(vcpu, rd->reg, val); + return 0; +} + static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, s64 new, s64 cur) { @@ -2496,15 +2513,20 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, "trap of EL2 register redirected to EL1"); } -#define EL2_REG_FILTERED(name, acc, rst, v, filter) { \ +#define SYS_REG_USER_FILTER(name, acc, rst, v, gu, su, filter) { \ SYS_DESC(SYS_##name), \ .access = acc, \ .reset = rst, \ .reg = name, \ + .get_user = gu, \ + .set_user = su, \ .visibility = filter, \ .val = v, \ } +#define EL2_REG_FILTERED(name, acc, rst, v, filter) \ + SYS_REG_USER_FILTER(name, acc, rst, v, NULL, NULL, filter) + #define EL2_REG(name, acc, rst, v) \ EL2_REG_FILTERED(name, acc, rst, v, el2_visibility) @@ -2515,6 +2537,10 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, EL2_REG_VNCR_FILT(name, hidden_visibility) #define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v) +#define TIMER_REG(name, vis) \ + SYS_REG_USER_FILTER(name, access_arch_timer, reset_val, 0, \ + NULL, arch_timer_set_user, vis) + /* * Since reset() callback and field val are not used for idregs, they will be * used for specific purposes for idregs. @@ -3485,11 +3511,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, + TIMER_REG(CNTP_CTL_EL0, NULL), { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTV_TVAL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTV_CTL_EL0), access_arch_timer }, + TIMER_REG(CNTV_CTL_EL0, NULL), { SYS_DESC(SYS_CNTV_CVAL_EL0), access_arch_timer }, /* PMEVCNTRn_EL0 */ @@ -3688,11 +3714,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0), EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_CNTHP_TVAL_EL2), access_arch_timer }, - EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0), + TIMER_REG(CNTHP_CTL_EL2, el2_visibility), EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer, .visibility = cnthv_visibility }, - EL2_REG_FILTERED(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0, cnthv_visibility), + TIMER_REG(CNTHV_CTL_EL2, cnthv_visibility), EL2_REG_FILTERED(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0, cnthv_visibility), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, -- cgit v1.2.3 From 8af198980eff2ed2a5df3d2ee39f8c9d61f40559 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:51 +0100 Subject: KVM: arm64: Move CNT*_CVAL_EL0 userspace accessors to generic infrastructure As for the control registers, move the comparator registers to the common infrastructure. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/guest.c | 4 ---- arch/arm64/kvm/sys_regs.c | 8 ++++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dea648706fd5..c23ec9be4ce2 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -593,9 +593,7 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) static const u64 timer_reg_list[] = { KVM_REG_ARM_TIMER_CNT, - KVM_REG_ARM_TIMER_CVAL, KVM_REG_ARM_PTIMER_CNT, - KVM_REG_ARM_PTIMER_CVAL, }; #define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) @@ -604,9 +602,7 @@ static bool is_timer_reg(u64 index) { switch (index) { case KVM_REG_ARM_TIMER_CNT: - case KVM_REG_ARM_TIMER_CVAL: case KVM_REG_ARM_PTIMER_CNT: - case KVM_REG_ARM_PTIMER_CVAL: return true; } return false; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d97aacf4c1dc..68e88d5c0dfb 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3512,11 +3512,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, TIMER_REG(CNTP_CTL_EL0, NULL), - { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, + TIMER_REG(CNTP_CVAL_EL0, NULL), { SYS_DESC(SYS_CNTV_TVAL_EL0), access_arch_timer }, TIMER_REG(CNTV_CTL_EL0, NULL), - { SYS_DESC(SYS_CNTV_CVAL_EL0), access_arch_timer }, + TIMER_REG(CNTV_CVAL_EL0, NULL), /* PMEVCNTRn_EL0 */ PMU_PMEVCNTR_EL0(0), @@ -3715,11 +3715,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_CNTHP_TVAL_EL2), access_arch_timer }, TIMER_REG(CNTHP_CTL_EL2, el2_visibility), - EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), + TIMER_REG(CNTHP_CVAL_EL2, el2_visibility), { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer, .visibility = cnthv_visibility }, TIMER_REG(CNTHV_CTL_EL2, cnthv_visibility), - EL2_REG_FILTERED(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0, cnthv_visibility), + TIMER_REG(CNTHV_CVAL_EL2, cnthv_visibility), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, -- cgit v1.2.3 From c3be3a48fb18f9d243fac452e0be41469bb246b4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:52 +0100 Subject: KVM: arm64: Move CNT*CT_EL0 userspace accessors to generic infrastructure Moving the counter registers is a bit more involved than for the control and comparator (there is no shadow data for the counter), but still pretty manageable. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier --- arch/arm64/kvm/guest.c | 7 ------- arch/arm64/kvm/sys_regs.c | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index c23ec9be4ce2..138e5e2dc10c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -592,19 +592,12 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) } static const u64 timer_reg_list[] = { - KVM_REG_ARM_TIMER_CNT, - KVM_REG_ARM_PTIMER_CNT, }; #define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) static bool is_timer_reg(u64 index) { - switch (index) { - case KVM_REG_ARM_TIMER_CNT: - case KVM_REG_ARM_PTIMER_CNT: - return true; - } return false; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 68e88d5c0dfb..e67eb39ddc11 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1605,12 +1605,38 @@ static int arch_timer_set_user(struct kvm_vcpu *vcpu, case SYS_CNTHP_CTL_EL2: val &= ~ARCH_TIMER_CTRL_IT_STAT; break; + case SYS_CNTVCT_EL0: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) + timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read() - val); + return 0; + case SYS_CNTPCT_EL0: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) + timer_set_offset(vcpu_ptimer(vcpu), kvm_phys_timer_read() - val); + return 0; } __vcpu_assign_sys_reg(vcpu, rd->reg, val); return 0; } +static int arch_timer_get_user(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 *val) +{ + switch (reg_to_encoding(rd)) { + case SYS_CNTVCT_EL0: + *val = kvm_phys_timer_read() - timer_get_offset(vcpu_vtimer(vcpu)); + break; + case SYS_CNTPCT_EL0: + *val = kvm_phys_timer_read() - timer_get_offset(vcpu_ptimer(vcpu)); + break; + default: + *val = __vcpu_sys_reg(vcpu, rd->reg); + } + + return 0; +} + static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, s64 new, s64 cur) { @@ -2539,7 +2565,7 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, #define TIMER_REG(name, vis) \ SYS_REG_USER_FILTER(name, access_arch_timer, reset_val, 0, \ - NULL, arch_timer_set_user, vis) + arch_timer_get_user, arch_timer_set_user, vis) /* * Since reset() callback and field val are not used for idregs, they will be @@ -3506,8 +3532,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { AMU_AMEVTYPER1_EL0(14), AMU_AMEVTYPER1_EL0(15), - { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTVCT_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTPCT_EL0), .access = access_arch_timer, + .get_user = arch_timer_get_user, .set_user = arch_timer_set_user }, + { SYS_DESC(SYS_CNTVCT_EL0), .access = access_arch_timer, + .get_user = arch_timer_get_user, .set_user = arch_timer_set_user }, { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, -- cgit v1.2.3 From 892f7c38ba3b7de19b3dffb8e148d5fbf1228f20 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:53 +0100 Subject: KVM: arm64: Fix WFxT handling of nested virt The spec for WFxT indicates that the parameter to the WFxT instruction is relative to the reading of CNTVCT_EL0. This means that the implementation needs to take the execution context into account, as CNTVOFF_EL2 does not always affect readings of CNTVCT_EL0 (such as when HCR_EL2.E2H is 1 and that we're in host context). This also rids us of the last instance of KVM_REG_ARM_TIMER_CNT outside of the userspace interaction code. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/handle_exit.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index bca8c80e11da..cc7d5d1709cb 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -147,7 +147,12 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) if (esr & ESR_ELx_WFx_ISS_RV) { u64 val, now; - now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT); + now = kvm_phys_timer_read(); + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) + now -= timer_get_offset(vcpu_hvtimer(vcpu)); + else + now -= timer_get_offset(vcpu_vtimer(vcpu)); + val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); if (now >= val) -- cgit v1.2.3 From 386aac77da112651a5cdadc4a6b29181592f5aa0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:54 +0100 Subject: KVM: arm64: Kill leftovers of ad-hoc timer userspace access Now that the whole timer infrastructure is handled as system register accesses, get rid of the now unused ad-hoc infrastructure. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 68 -------------------------------------------- arch/arm64/kvm/guest.c | 55 ----------------------------------- include/kvm/arm_arch_timer.h | 3 -- 3 files changed, 126 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 27662a3a3043..3f675875abea 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1112,49 +1112,6 @@ void kvm_timer_cpu_down(void) disable_percpu_irq(host_ptimer_irq); } -int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) -{ - struct arch_timer_context *timer; - - switch (regid) { - case KVM_REG_ARM_TIMER_CTL: - timer = vcpu_vtimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); - break; - case KVM_REG_ARM_TIMER_CNT: - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, - &vcpu->kvm->arch.flags)) { - timer = vcpu_vtimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); - } - break; - case KVM_REG_ARM_TIMER_CVAL: - timer = vcpu_vtimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); - break; - case KVM_REG_ARM_PTIMER_CTL: - timer = vcpu_ptimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); - break; - case KVM_REG_ARM_PTIMER_CNT: - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, - &vcpu->kvm->arch.flags)) { - timer = vcpu_ptimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); - } - break; - case KVM_REG_ARM_PTIMER_CVAL: - timer = vcpu_ptimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); - break; - - default: - return -1; - } - - return 0; -} - static u64 read_timer_ctl(struct arch_timer_context *timer) { /* @@ -1171,31 +1128,6 @@ static u64 read_timer_ctl(struct arch_timer_context *timer) return ctl; } -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) -{ - switch (regid) { - case KVM_REG_ARM_TIMER_CTL: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CTL); - case KVM_REG_ARM_TIMER_CNT: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CNT); - case KVM_REG_ARM_TIMER_CVAL: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CVAL); - case KVM_REG_ARM_PTIMER_CTL: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CTL); - case KVM_REG_ARM_PTIMER_CNT: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CNT); - case KVM_REG_ARM_PTIMER_CVAL: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CVAL); - } - return (u64)-1; -} - static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 138e5e2dc10c..1c87699fd886 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -591,49 +591,6 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) return copy_core_reg_indices(vcpu, NULL); } -static const u64 timer_reg_list[] = { -}; - -#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) - -static bool is_timer_reg(u64 index) -{ - return false; -} - -static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - for (int i = 0; i < NUM_TIMER_REGS; i++) { - if (put_user(timer_reg_list[i], uindices)) - return -EFAULT; - uindices++; - } - - return 0; -} - -static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - int ret; - - ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); - if (ret != 0) - return -EFAULT; - - return kvm_arm_timer_set_reg(vcpu, reg->id, val); -} - -static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - - val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; -} - static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu) { const unsigned int slices = vcpu_sve_slices(vcpu); @@ -709,7 +666,6 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) res += num_sve_regs(vcpu); res += kvm_arm_num_sys_reg_descs(vcpu); res += kvm_arm_get_fw_num_regs(vcpu); - res += NUM_TIMER_REGS; return res; } @@ -740,11 +696,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return ret; uindices += kvm_arm_get_fw_num_regs(vcpu); - ret = copy_timer_indices(vcpu, uindices); - if (ret < 0) - return ret; - uindices += NUM_TIMER_REGS; - return kvm_arm_copy_sys_reg_indices(vcpu, uindices); } @@ -762,9 +713,6 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg); } - if (is_timer_reg(reg->id)) - return get_timer_reg(vcpu, reg); - return kvm_arm_sys_reg_get_reg(vcpu, reg); } @@ -782,9 +730,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg); } - if (is_timer_reg(reg->id)) - return set_timer_reg(vcpu, reg); - return kvm_arm_sys_reg_set_reg(vcpu, reg); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 5f7f2ed8817c..7310841f4512 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -107,9 +107,6 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); void kvm_timer_init_vm(struct kvm *kvm); -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); -int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); - int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -- cgit v1.2.3 From 6418330c8478735f625398bc4e96d3ac6ce1e055 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:55 +0100 Subject: KVM: arm64: selftests: Make dependencies on VHE-specific registers explicit The hyp virtual timer registers only exist when VHE is present, Similarly, VNCR_EL2 only exists when NV2 is present. Make these dependencies explicit. Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/get-reg-list.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index 011fad95dd02..0a4cfb368512 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -65,6 +65,9 @@ static struct feature_id_reg feat_id_regs[] = { REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP), REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP), REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP), + REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY), + REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP), + REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP), }; bool filter_reg(__u64 reg) -- cgit v1.2.3 From 4da5a9af78b74fb771a4d25dc794296d10e170b1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:56 +0100 Subject: KVM: arm64: selftests: Add an E2H=0-specific configuration to get_reg_list Add yet another configuration, this time dealing E2H=0. Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/get-reg-list.c | 79 ++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index 0a4cfb368512..7a238755f072 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -758,6 +758,10 @@ static __u64 el2_regs[] = { SYS_REG(VSESR_EL2), }; +static __u64 el2_e2h0_regs[] = { + /* Empty */ +}; + #define BASE_SUBLIST \ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } #define VREGS_SUBLIST \ @@ -792,6 +796,15 @@ static __u64 el2_regs[] = { .regs = el2_regs, \ .regs_n = ARRAY_SIZE(el2_regs), \ } +#define EL2_E2H0_SUBLIST \ + EL2_SUBLIST, \ + { \ + .name = "EL2 E2H0", \ + .capability = KVM_CAP_ARM_EL2_E2H0, \ + .feature = KVM_ARM_VCPU_HAS_EL2_E2H0, \ + .regs = el2_e2h0_regs, \ + .regs_n = ARRAY_SIZE(el2_e2h0_regs), \ + } static struct vcpu_reg_list vregs_config = { .sublists = { @@ -900,6 +913,65 @@ static struct vcpu_reg_list el2_pauth_pmu_config = { }, }; +static struct vcpu_reg_list el2_e2h0_vregs_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_sve_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_pauth_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + struct vcpu_reg_list *vcpu_configs[] = { &vregs_config, &vregs_pmu_config, @@ -914,5 +986,12 @@ struct vcpu_reg_list *vcpu_configs[] = { &el2_sve_pmu_config, &el2_pauth_config, &el2_pauth_pmu_config, + + &el2_e2h0_vregs_config, + &el2_e2h0_vregs_pmu_config, + &el2_e2h0_sve_config, + &el2_e2h0_sve_pmu_config, + &el2_e2h0_pauth_config, + &el2_e2h0_pauth_pmu_config, }; int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); -- cgit v1.2.3 From 5c7cf1e44e94a5408b1b5277810502b0f82b77fe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:57 +0100 Subject: KVM: arm64: selftests: Fix misleading comment about virtual timer encoding The userspace-visible encoding for CNTV_CVAL_EL0 and CNTVCNT_EL0 have been swapped for as long as usersapce has had access to the registers. This is documented in arch/arm64/include/uapi/asm/kvm.h. Despite that, the get_reg_list test has unhelpful comments indicating the wrong register for the encoding. Replace this with definitions exposed in the include file, and a comment explaining again the brokenness. Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/get-reg-list.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index 7a238755f072..c9b84eeaab6b 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -348,9 +348,20 @@ static __u64 base_regs[] = { KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */ - ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 0, 2), + + /* + * EL0 Virtual Timer Registers + * + * WARNING: + * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined + * with the appropriate register encodings. Their values have been + * accidentally swapped. As this is set API, the definitions here + * must be used, rather than ones derived from the encodings. + */ + KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0), + KVM_REG_ARM_TIMER_CVAL, + KVM_REG_ARM_TIMER_CNT, + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ -- cgit v1.2.3 From fb10ddf35c1cc3b2888a944c0a3b1aa3baea585e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 24 Sep 2025 16:51:49 -0700 Subject: KVM: arm64: Compute per-vCPU FGTs at vcpu_load() To date KVM has used the fine-grained traps for the sake of UNDEF enforcement (so-called FGUs), meaning the constituent parts could be computed on a per-VM basis and folded into the effective value when programmed. Prepare for traps changing based on the vCPU context by computing the whole mess of them at vcpu_load(). Aggressively inline all the helpers to preserve the build-time checks that were there before. Signed-off-by: Oliver Upton Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 50 +++++++++++ arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/config.c | 82 ++++++++++++++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 148 ++++---------------------------- arch/arm64/kvm/hyp/nvhe/pkvm.c | 1 + 5 files changed, 151 insertions(+), 131 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b763293281c8..64302c438355 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -816,6 +816,11 @@ struct kvm_vcpu_arch { u64 hcrx_el2; u64 mdcr_el2; + struct { + u64 r; + u64 w; + } fgt[__NR_FGT_GROUP_IDS__]; + /* Exception Information */ struct kvm_vcpu_fault_info fault; @@ -1600,6 +1605,51 @@ static inline bool kvm_arch_has_irq_bypass(void) void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt); void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1); void check_feature_map(void); +void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu); + +static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg reg) +{ + switch (reg) { + case HFGRTR_EL2: + case HFGWTR_EL2: + return HFGRTR_GROUP; + case HFGITR_EL2: + return HFGITR_GROUP; + case HDFGRTR_EL2: + case HDFGWTR_EL2: + return HDFGRTR_GROUP; + case HAFGRTR_EL2: + return HAFGRTR_GROUP; + case HFGRTR2_EL2: + case HFGWTR2_EL2: + return HFGRTR2_GROUP; + case HFGITR2_EL2: + return HFGITR2_GROUP; + case HDFGRTR2_EL2: + case HDFGWTR2_EL2: + return HDFGRTR2_GROUP; + default: + BUILD_BUG_ON(1); + } +} +#define vcpu_fgt(vcpu, reg) \ + ({ \ + enum fgt_group_id id = __fgt_reg_to_group_id(reg); \ + u64 *p; \ + switch (reg) { \ + case HFGWTR_EL2: \ + case HDFGWTR_EL2: \ + case HFGWTR2_EL2: \ + case HDFGWTR2_EL2: \ + p = &(vcpu)->arch.fgt[id].w; \ + break; \ + default: \ + p = &(vcpu)->arch.fgt[id].r; \ + break; \ + } \ + \ + p; \ + }) #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f01cacb669cf..870953b4a8a7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -642,6 +642,7 @@ nommu: vcpu->arch.hcr_el2 |= HCR_TWI; vcpu_set_pauth_traps(vcpu); + kvm_vcpu_load_fgt(vcpu); if (is_protected_kvm_enabled()) { kvm_call_hyp_nvhe(__pkvm_vcpu_load, diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index fbd8944a3dea..b1cf7660efe1 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -5,6 +5,8 @@ */ #include +#include +#include #include /* @@ -1428,3 +1430,83 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r break; } } + +static __always_inline struct fgt_masks *__fgt_reg_to_masks(enum vcpu_sysreg reg) +{ + switch (reg) { + case HFGRTR_EL2: + return &hfgrtr_masks; + case HFGWTR_EL2: + return &hfgwtr_masks; + case HFGITR_EL2: + return &hfgitr_masks; + case HDFGRTR_EL2: + return &hdfgrtr_masks; + case HDFGWTR_EL2: + return &hdfgwtr_masks; + case HAFGRTR_EL2: + return &hafgrtr_masks; + case HFGRTR2_EL2: + return &hfgrtr2_masks; + case HFGWTR2_EL2: + return &hfgwtr2_masks; + case HFGITR2_EL2: + return &hfgitr2_masks; + case HDFGRTR2_EL2: + return &hdfgrtr2_masks; + case HDFGWTR2_EL2: + return &hdfgwtr2_masks; + default: + BUILD_BUG_ON(1); + } +} + +static __always_inline void __compute_fgt(struct kvm_vcpu *vcpu, enum vcpu_sysreg reg) +{ + u64 fgu = vcpu->kvm->arch.fgu[__fgt_reg_to_group_id(reg)]; + struct fgt_masks *m = __fgt_reg_to_masks(reg); + u64 clear = 0, set = 0, val = m->nmask; + + set |= fgu & m->mask; + clear |= fgu & m->nmask; + + if (is_nested_ctxt(vcpu)) { + u64 nested = __vcpu_sys_reg(vcpu, reg); + set |= nested & m->mask; + clear |= ~nested & m->nmask; + } + + val |= set; + val &= ~clear; + *vcpu_fgt(vcpu, reg) = val; +} + +static void __compute_hfgwtr(struct kvm_vcpu *vcpu) +{ + __compute_fgt(vcpu, HFGWTR_EL2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) + *vcpu_fgt(vcpu, HFGWTR_EL2) |= HFGWTR_EL2_TCR_EL1; +} + +void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_final_cap(ARM64_HAS_FGT)) + return; + + __compute_fgt(vcpu, HFGRTR_EL2); + __compute_hfgwtr(vcpu); + __compute_fgt(vcpu, HFGITR_EL2); + __compute_fgt(vcpu, HDFGRTR_EL2); + __compute_fgt(vcpu, HDFGWTR_EL2); + __compute_fgt(vcpu, HAFGRTR_EL2); + + if (!cpus_have_final_cap(ARM64_HAS_FGT2)) + return; + + __compute_fgt(vcpu, HFGRTR2_EL2); + __compute_fgt(vcpu, HFGWTR2_EL2); + __compute_fgt(vcpu, HFGITR2_EL2); + __compute_fgt(vcpu, HDFGRTR2_EL2); + __compute_fgt(vcpu, HDFGWTR2_EL2); +} diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index b6682202edf3..c5d5e5b86eaf 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -195,123 +195,6 @@ static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) __deactivate_cptr_traps_nvhe(vcpu); } -#define reg_to_fgt_masks(reg) \ - ({ \ - struct fgt_masks *m; \ - switch(reg) { \ - case HFGRTR_EL2: \ - m = &hfgrtr_masks; \ - break; \ - case HFGWTR_EL2: \ - m = &hfgwtr_masks; \ - break; \ - case HFGITR_EL2: \ - m = &hfgitr_masks; \ - break; \ - case HDFGRTR_EL2: \ - m = &hdfgrtr_masks; \ - break; \ - case HDFGWTR_EL2: \ - m = &hdfgwtr_masks; \ - break; \ - case HAFGRTR_EL2: \ - m = &hafgrtr_masks; \ - break; \ - case HFGRTR2_EL2: \ - m = &hfgrtr2_masks; \ - break; \ - case HFGWTR2_EL2: \ - m = &hfgwtr2_masks; \ - break; \ - case HFGITR2_EL2: \ - m = &hfgitr2_masks; \ - break; \ - case HDFGRTR2_EL2: \ - m = &hdfgrtr2_masks; \ - break; \ - case HDFGWTR2_EL2: \ - m = &hdfgwtr2_masks; \ - break; \ - default: \ - BUILD_BUG_ON(1); \ - } \ - \ - m; \ - }) - -#define compute_clr_set(vcpu, reg, clr, set) \ - do { \ - u64 hfg = __vcpu_sys_reg(vcpu, reg); \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - set |= hfg & m->mask; \ - clr |= ~hfg & m->nmask; \ - } while(0) - -#define reg_to_fgt_group_id(reg) \ - ({ \ - enum fgt_group_id id; \ - switch(reg) { \ - case HFGRTR_EL2: \ - case HFGWTR_EL2: \ - id = HFGRTR_GROUP; \ - break; \ - case HFGITR_EL2: \ - id = HFGITR_GROUP; \ - break; \ - case HDFGRTR_EL2: \ - case HDFGWTR_EL2: \ - id = HDFGRTR_GROUP; \ - break; \ - case HAFGRTR_EL2: \ - id = HAFGRTR_GROUP; \ - break; \ - case HFGRTR2_EL2: \ - case HFGWTR2_EL2: \ - id = HFGRTR2_GROUP; \ - break; \ - case HFGITR2_EL2: \ - id = HFGITR2_GROUP; \ - break; \ - case HDFGRTR2_EL2: \ - case HDFGWTR2_EL2: \ - id = HDFGRTR2_GROUP; \ - break; \ - default: \ - BUILD_BUG_ON(1); \ - } \ - \ - id; \ - }) - -#define compute_undef_clr_set(vcpu, kvm, reg, clr, set) \ - do { \ - u64 hfg = kvm->arch.fgu[reg_to_fgt_group_id(reg)]; \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - set |= hfg & m->mask; \ - clr |= hfg & m->nmask; \ - } while(0) - -#define update_fgt_traps_cs(hctxt, vcpu, kvm, reg, clr, set) \ - do { \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - u64 c = clr, s = set; \ - u64 val; \ - \ - ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ - if (is_nested_ctxt(vcpu)) \ - compute_clr_set(vcpu, reg, c, s); \ - \ - compute_undef_clr_set(vcpu, kvm, reg, c, s); \ - \ - val = m->nmask; \ - val |= s; \ - val &= ~c; \ - write_sysreg_s(val, SYS_ ## reg); \ - } while(0) - -#define update_fgt_traps(hctxt, vcpu, kvm, reg) \ - update_fgt_traps_cs(hctxt, vcpu, kvm, reg, 0, 0) - static inline bool cpu_has_amu(void) { u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); @@ -320,33 +203,36 @@ static inline bool cpu_has_amu(void) ID_AA64PFR0_EL1_AMU_SHIFT); } +#define __activate_fgt(hctxt, vcpu, reg) \ + do { \ + ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ + write_sysreg_s(*vcpu_fgt(vcpu, reg), SYS_ ## reg); \ + } while (0) + static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); - struct kvm *kvm = kern_hyp_va(vcpu->kvm); if (!cpus_have_final_cap(ARM64_HAS_FGT)) return; - update_fgt_traps(hctxt, vcpu, kvm, HFGRTR_EL2); - update_fgt_traps_cs(hctxt, vcpu, kvm, HFGWTR_EL2, 0, - cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) ? - HFGWTR_EL2_TCR_EL1_MASK : 0); - update_fgt_traps(hctxt, vcpu, kvm, HFGITR_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR_EL2); + __activate_fgt(hctxt, vcpu, HFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HFGWTR_EL2); + __activate_fgt(hctxt, vcpu, HFGITR_EL2); + __activate_fgt(hctxt, vcpu, HDFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HDFGWTR_EL2); if (cpu_has_amu()) - update_fgt_traps(hctxt, vcpu, kvm, HAFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HAFGRTR_EL2); if (!cpus_have_final_cap(ARM64_HAS_FGT2)) return; - update_fgt_traps(hctxt, vcpu, kvm, HFGRTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HFGWTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HFGITR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGRTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGWTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGITR2_EL2); + __activate_fgt(hctxt, vcpu, HDFGRTR2_EL2); + __activate_fgt(hctxt, vcpu, HDFGWTR2_EL2); } #define __deactivate_fgt(htcxt, vcpu, reg) \ diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 05774aed09cb..43bde061b65d 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -172,6 +172,7 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) /* Trust the host for non-protected vcpu features. */ vcpu->arch.hcrx_el2 = host_vcpu->arch.hcrx_el2; + memcpy(vcpu->arch.fgt, host_vcpu->arch.fgt, sizeof(vcpu->arch.fgt)); return 0; } -- cgit v1.2.3 From e0b5a7967dec05144bc98125f98c47f74fd1152b Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 24 Sep 2025 16:51:50 -0700 Subject: KVM: arm64: nv: Use FGT write trap of MDSCR_EL1 when available Marc reports that the performance of running an L3 guest has regressed by 60% as a result of setting MDCR_EL2.TDA to hide bad architecture. That's of course terrible for the single user of recursive NV ;-) While there's nothing to be done on non-FGT systems, take advantage of the precise write trap of MDSCR_EL1 and leave the rest of the debug registers untrapped. Reported-by: Marc Zyngier Signed-off-by: Oliver Upton Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 10 +++++++++- arch/arm64/kvm/nested.c | 9 ++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index b1cf7660efe1..24bb3f36e9d5 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -1489,6 +1489,14 @@ static void __compute_hfgwtr(struct kvm_vcpu *vcpu) *vcpu_fgt(vcpu, HFGWTR_EL2) |= HFGWTR_EL2_TCR_EL1; } +static void __compute_hdfgwtr(struct kvm_vcpu *vcpu) +{ + __compute_fgt(vcpu, HDFGWTR_EL2); + + if (is_hyp_ctxt(vcpu)) + *vcpu_fgt(vcpu, HDFGWTR_EL2) |= HDFGWTR_EL2_MDSCR_EL1; +} + void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu) { if (!cpus_have_final_cap(ARM64_HAS_FGT)) @@ -1498,7 +1506,7 @@ void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu) __compute_hfgwtr(vcpu); __compute_fgt(vcpu, HFGITR_EL2); __compute_fgt(vcpu, HDFGRTR_EL2); - __compute_fgt(vcpu, HDFGWTR_EL2); + __compute_hdfgwtr(vcpu); __compute_fgt(vcpu, HAFGRTR_EL2); if (!cpus_have_final_cap(ARM64_HAS_FGT2)) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 7a045cad6bdf..f04cda40545b 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1859,13 +1859,16 @@ void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu) { u64 guest_mdcr = __vcpu_sys_reg(vcpu, MDCR_EL2); + if (is_nested_ctxt(vcpu)) + vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE); /* * In yet another example where FEAT_NV2 is fscking broken, accesses * to MDSCR_EL1 are redirected to the VNCR despite having an effect * at EL2. Use a big hammer to apply sanity. + * + * Unless of course we have FEAT_FGT, in which case we can precisely + * trap MDSCR_EL1. */ - if (is_hyp_ctxt(vcpu)) + else if (!cpus_have_final_cap(ARM64_HAS_FGT)) vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; - else - vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE); } -- cgit v1.2.3 From 64d00d41f57bf2228ed6c217c8e263440a4248df Mon Sep 17 00:00:00 2001 From: Arun Abhishek Chowhan Date: Mon, 13 Oct 2025 15:29:14 +0530 Subject: drm/xe: Sort include files alphabetically. Sort the include lines alphabetically, no impact on code behavior. Signed-off-by: Arun Abhishek Chowhan Reviewed-by: Nitin Gote Link: https://lore.kernel.org/r/20251013095914.3742505-1-arun.abhishek.chowhan@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_guc_ads.c | 2 +- drivers/gpu/drm/xe/xe_tile.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 34f622bef3e0..1c3c9557a9bd 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -27,8 +27,8 @@ #include "xe_sriov_vf.h" #include "xe_step.h" #include "xe_tile_debugfs.h" -#include "xe_wa.h" #include "xe_vsec.h" +#include "xe_wa.h" #ifdef CONFIG_DRM_XE_DEBUG #include "xe_bo_evict.h" diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 58e0b0294a5b..22ac2a8b74c8 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -18,6 +18,7 @@ #include "xe_bo.h" #include "xe_gt.h" #include "xe_gt_ccs_mode.h" +#include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_buf.h" @@ -30,7 +31,6 @@ #include "xe_platform_types.h" #include "xe_uc_fw.h" #include "xe_wa.h" -#include "xe_gt_mcr.h" /* Slack of a few additional entries per engine */ #define ADS_REGSET_EXTRA_MAX 8 diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 6edb5062c1da..8fd6ee990d14 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -19,9 +19,9 @@ #include "xe_tile.h" #include "xe_tile_sysfs.h" #include "xe_ttm_vram_mgr.h" -#include "xe_wa.h" #include "xe_vram.h" #include "xe_vram_types.h" +#include "xe_wa.h" /** * DOC: Multi-tile Design -- cgit v1.2.3 From 3df5aacb9d45fdc2ffaa2ecbddd43ac8fb17b351 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Sat, 11 Oct 2025 18:05:08 +0530 Subject: drm/xe/i2c: Introduce xe_i2c_irq_present() In preparation of wider usecases which require checking for I2C IRQ presence, introduce xe_i2c_irq_present() helper. Signed-off-by: Raag Jadav Reviewed-by: Andi Shyti Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20251011123509.3233213-2-raag.jadav@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_i2c.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index 48dfcb41fa08..25c6b8f3c0bb 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -160,6 +160,11 @@ bool xe_i2c_present(struct xe_device *xe) return xe->i2c && xe->i2c->ep.cookie == XE_I2C_EP_COOKIE_DEVICE; } +static bool xe_i2c_irq_present(struct xe_device *xe) +{ + return xe->i2c && xe->i2c->adapter_irq; +} + /** * xe_i2c_irq_handler: Handler for I2C interrupts * @xe: xe device instance @@ -170,7 +175,7 @@ bool xe_i2c_present(struct xe_device *xe) */ void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { - if (!xe->i2c || !xe->i2c->adapter_irq) + if (!xe_i2c_irq_present(xe)) return; if (master_ctl & I2C_IRQ) -- cgit v1.2.3 From 0bb78ce099265fba3808c05de1c75c649664a6cb Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Sat, 11 Oct 2025 18:05:09 +0530 Subject: drm/xe/i2c: Wire up reset/postinstall for I2C IRQ I2C IRQ needs to be routed to SGUnit or PUnit for the devices that support it. Wire up reset/postinstall handles for I2C IRQ to take care of this. Signed-off-by: Raag Jadav Reviewed-by: Andi Shyti Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20251011123509.3233213-3-raag.jadav@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_i2c_regs.h | 3 +++ drivers/gpu/drm/xe/xe_i2c.c | 21 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_i2c.h | 4 ++++ drivers/gpu/drm/xe/xe_irq.c | 2 ++ 4 files changed, 30 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_i2c_regs.h b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h index af781c8e4a80..f2e455e2bfe4 100644 --- a/drivers/gpu/drm/xe/regs/xe_i2c_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h @@ -14,6 +14,9 @@ #define REG_SG_REMAP_ADDR_PREFIX XE_REG(SOC_BASE + 0x0164) #define REG_SG_REMAP_ADDR_POSTFIX XE_REG(SOC_BASE + 0x0168) +#define I2C_BRIDGE_PCICFGCTL XE_REG(I2C_BRIDGE_OFFSET + 0x200) +#define ACPI_INTR_EN REG_BIT(1) + #define I2C_CONFIG_CMD XE_REG(I2C_CONFIG_SPACE_OFFSET + PCI_COMMAND) #define I2C_CONFIG_PMCSR XE_REG(I2C_CONFIG_SPACE_OFFSET + 0x84) diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index 25c6b8f3c0bb..0b5452be0c87 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -182,6 +182,26 @@ void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) generic_handle_irq_safe(xe->i2c->adapter_irq); } +void xe_i2c_irq_reset(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe_i2c_irq_present(xe)) + return; + + xe_mmio_rmw32(mmio, I2C_BRIDGE_PCICFGCTL, ACPI_INTR_EN, 0); +} + +void xe_i2c_irq_postinstall(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe_i2c_irq_present(xe)) + return; + + xe_mmio_rmw32(mmio, I2C_BRIDGE_PCICFGCTL, 0, ACPI_INTR_EN); +} + static int xe_i2c_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw_irq_num) { @@ -339,6 +359,7 @@ int xe_i2c_probe(struct xe_device *xe) if (ret) goto err_remove_irq; + xe_i2c_irq_postinstall(xe); return devm_add_action_or_reset(drm_dev, xe_i2c_remove, i2c); err_remove_irq: diff --git a/drivers/gpu/drm/xe/xe_i2c.h b/drivers/gpu/drm/xe/xe_i2c.h index ecd5f10358e2..425d8160835f 100644 --- a/drivers/gpu/drm/xe/xe_i2c.h +++ b/drivers/gpu/drm/xe/xe_i2c.h @@ -51,12 +51,16 @@ struct xe_i2c { int xe_i2c_probe(struct xe_device *xe); bool xe_i2c_present(struct xe_device *xe); void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl); +void xe_i2c_irq_postinstall(struct xe_device *xe); +void xe_i2c_irq_reset(struct xe_device *xe); void xe_i2c_pm_suspend(struct xe_device *xe); void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold); #else static inline int xe_i2c_probe(struct xe_device *xe) { return 0; } static inline bool xe_i2c_present(struct xe_device *xe) { return false; } static inline void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { } +static inline void xe_i2c_irq_postinstall(struct xe_device *xe) { } +static inline void xe_i2c_irq_reset(struct xe_device *xe) { } static inline void xe_i2c_pm_suspend(struct xe_device *xe) { } static inline void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) { } #endif diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 870edaf69388..af519414a429 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -616,6 +616,7 @@ static void xe_irq_reset(struct xe_device *xe) tile = xe_device_get_root_tile(xe); mask_and_disable(tile, GU_MISC_IRQ_OFFSET); xe_display_irq_reset(xe); + xe_i2c_irq_reset(xe); /* * The tile's top-level status register should be the last one @@ -657,6 +658,7 @@ static void xe_irq_postinstall(struct xe_device *xe) } xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe)); + xe_i2c_irq_postinstall(xe); /* * ASLE backlight operations are reported via GUnit GSE interrupts -- cgit v1.2.3 From 9b42321a02c50a12b2beb6ae9469606257fbecea Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 10 Oct 2025 17:25:29 +0000 Subject: drm/xe/guc: Check GuC running state before deregistering exec queue In normal operation, a registered exec queue is disabled and deregistered through the GuC, and freed only after the GuC confirms completion. However, if the driver is forced to unbind while the exec queue is still running, the user may call exec_destroy() after the GuC has already been stopped and CT communication disabled. In this case, the driver cannot receive a response from the GuC, preventing proper cleanup of exec queue resources. Fix this by directly releasing the resources when GuC is not running. Here is the failure dmesg log: " [ 468.089581] ---[ end trace 0000000000000000 ]--- [ 468.089608] pci 0000:03:00.0: [drm] *ERROR* GT0: GUC ID manager unclean (1/65535) [ 468.090558] pci 0000:03:00.0: [drm] GT0: total 65535 [ 468.090562] pci 0000:03:00.0: [drm] GT0: used 1 [ 468.090564] pci 0000:03:00.0: [drm] GT0: range 1..1 (1) [ 468.092716] ------------[ cut here ]------------ [ 468.092719] WARNING: CPU: 14 PID: 4775 at drivers/gpu/drm/xe/xe_ttm_vram_mgr.c:298 ttm_vram_mgr_fini+0xf8/0x130 [xe] " v2: use xe_uc_fw_is_running() instead of xe_guc_ct_enabled(). As CT may go down and come back during VF migration. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20251010172529.2967639-2-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index e9aa0625ce60..0ef67d3523a7 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -44,6 +44,7 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_trace.h" +#include "xe_uc_fw.h" #include "xe_vm.h" static struct xe_guc * @@ -1501,7 +1502,17 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); trace_xe_exec_queue_cleanup_entity(q); - if (exec_queue_registered(q)) + /* + * Expected state transitions for cleanup: + * - If the exec queue is registered and GuC firmware is running, we must first + * disable scheduling and deregister the queue to ensure proper teardown and + * resource release in the GuC, then destroy the exec queue on driver side. + * - If the GuC is already stopped (e.g., during driver unload or GPU reset), + * we cannot expect a response for the deregister request. In this case, + * it is safe to directly destroy the exec queue on driver side, as the GuC + * will not process further requests and all resources must be cleaned up locally. + */ + if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) disable_scheduling_deregister(guc, q); else __guc_exec_queue_destroy(guc, q); -- cgit v1.2.3 From 1696b0cfcf004a3af34ffe4c57a14e837ef18144 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Mon, 29 Sep 2025 11:29:04 -0400 Subject: drm/i915/guc: Skip communication warning on reset in progress GuC IRQ and tasklet handler receive just single G2H message, and let other messages to be received from next tasklet. During this chained tasklet process, if reset process started, communication will be disabled. Skip warning for this condition. Fixes: 65dd4ed0f4e1 ("drm/i915/guc: Don't receive all G2H messages in irq handler") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15018 Signed-off-by: Zhanjun Dong Reviewed-by: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Link: https://lore.kernel.org/r/20250929152904.269776-1-zhanjun.dong@intel.com (cherry picked from commit 604b5ee4a653a70979ce689dbd6a5d942eb016bf) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 3e7e5badcc2b..2c651ec024ef 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1325,9 +1325,16 @@ static int ct_receive(struct intel_guc_ct *ct) static void ct_try_receive_message(struct intel_guc_ct *ct) { + struct intel_guc *guc = ct_to_guc(ct); int ret; - if (GEM_WARN_ON(!ct->enabled)) + if (!ct->enabled) { + GEM_WARN_ON(!guc_to_gt(guc)->uc.reset_in_progress); + return; + } + + /* When interrupt disabled, message handling is not expected */ + if (!guc->interrupts.enabled) return; ret = ct_receive(ct); -- cgit v1.2.3 From 760039c95c78490c5c66ef584fcd536797ed6a2f Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 3 Oct 2025 17:57:30 +0300 Subject: drm/i915/frontbuffer: Move bo refcounting intel_frontbuffer_{get,release}() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently xe's intel_frontbuffer implementation forgets to hold a reference on the bo. This makes the entire thing extremely fragile as the cleanup order now depends on bo references held by other things (namely intel_fb_bo_framebuffer_fini()). Move the bo refcounting to intel_frontbuffer_{get,release}() so that both i915 and xe do this the same way. I first tried to fix this by having xe do the refcounting from its intel_bo_set_frontbuffer() implementation (which is what i915 does currently), but turns out xe's drm_gem_object_free() can sleep and thus drm_gem_object_put() isn't safe to call while we hold fb_tracking.lock. Fixes: 10690b8a49bc ("drm/i915/display: Add intel_fb_bo_framebuffer_fini") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251003145734.7634-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit eb4d490729a5fd8dc5a76d334f8d01fec7c14bbe) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_frontbuffer.c | 10 +++++++++- drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h | 2 -- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 43be5377ddc1..73ed28ac9573 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -270,6 +270,8 @@ static void frontbuffer_release(struct kref *ref) spin_unlock(&display->fb_tracking.lock); i915_active_fini(&front->write); + + drm_gem_object_put(obj); kfree_rcu(front, rcu); } @@ -287,6 +289,8 @@ intel_frontbuffer_get(struct drm_gem_object *obj) if (!front) return NULL; + drm_gem_object_get(obj); + front->obj = obj; kref_init(&front->ref); atomic_set(&front->bits, 0); @@ -299,8 +303,12 @@ intel_frontbuffer_get(struct drm_gem_object *obj) spin_lock(&display->fb_tracking.lock); cur = intel_bo_set_frontbuffer(obj, front); spin_unlock(&display->fb_tracking.lock); - if (cur != front) + + if (cur != front) { + drm_gem_object_put(obj); kfree(front); + } + return cur; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h index b6dc3d1b9bb1..b682969e3a29 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h @@ -89,12 +89,10 @@ i915_gem_object_set_frontbuffer(struct drm_i915_gem_object *obj, if (!front) { RCU_INIT_POINTER(obj->frontbuffer, NULL); - drm_gem_object_put(intel_bo_to_drm_bo(obj)); } else if (rcu_access_pointer(obj->frontbuffer)) { cur = rcu_dereference_protected(obj->frontbuffer, true); kref_get(&cur->ref); } else { - drm_gem_object_get(intel_bo_to_drm_bo(obj)); rcu_assign_pointer(obj->frontbuffer, front); } -- cgit v1.2.3 From 86af6b90e0556fcefbc6e98eb78bdce90327ee76 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 3 Oct 2025 17:57:31 +0300 Subject: drm/i915/fb: Fix the set_tiling vs. addfb race, again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_frontbuffer_get() is what locks out subsequent set_tiling changes to the bo. Thus the fence vs. modifier check must be done after intel_frontbuffer_get(), or else a concurrent set_tiling ioctl might sneak in and change the fence after the check has been done. Close the race again. See commit dd689287b977 ("drm/i915: Prevent concurrent tiling/framebuffer modifications") for the previous instance. v2: Reorder intel_user_framebuffer_destroy() to match the unwind (Jani) Cc: Jouni Högander Reviewed-by: Jani Nikula Fixes: 10690b8a49bc ("drm/i915/display: Add intel_fb_bo_framebuffer_fini") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251003145734.7634-3-ville.syrjala@linux.intel.com (cherry picked from commit 1d1e4ded216017f8febd91332ee337f0e0e79285) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fb.c | 38 +++++++++++++++++---------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 22a4a1575d22..b817ff44c043 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -2113,10 +2113,10 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) if (intel_fb_uses_dpt(fb)) intel_dpt_destroy(intel_fb->dpt_vm); - intel_frontbuffer_put(intel_fb->frontbuffer); - intel_fb_bo_framebuffer_fini(intel_fb_bo(fb)); + intel_frontbuffer_put(intel_fb->frontbuffer); + kfree(intel_fb); } @@ -2218,15 +2218,17 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, int ret = -EINVAL; int i; + /* + * intel_frontbuffer_get() must be done before + * intel_fb_bo_framebuffer_init() to avoid set_tiling vs. addfb race. + */ + intel_fb->frontbuffer = intel_frontbuffer_get(obj); + if (!intel_fb->frontbuffer) + return -ENOMEM; + ret = intel_fb_bo_framebuffer_init(fb, obj, mode_cmd); if (ret) - return ret; - - intel_fb->frontbuffer = intel_frontbuffer_get(obj); - if (!intel_fb->frontbuffer) { - ret = -ENOMEM; - goto err; - } + goto err_frontbuffer_put; ret = -EINVAL; if (!drm_any_plane_has_format(display->drm, @@ -2235,7 +2237,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "unsupported pixel format %p4cc / modifier 0x%llx\n", &mode_cmd->pixel_format, mode_cmd->modifier[0]); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } max_stride = intel_fb_max_stride(display, mode_cmd->pixel_format, @@ -2246,7 +2248,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ? "tiled" : "linear", mode_cmd->pitches[0], max_stride); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ @@ -2254,7 +2256,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "plane 0 offset (0x%08x) must be 0\n", mode_cmd->offsets[0]); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } drm_helper_mode_fill_fb_struct(display->drm, fb, info, mode_cmd); @@ -2264,7 +2266,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (mode_cmd->handles[i] != mode_cmd->handles[0]) { drm_dbg_kms(display->drm, "bad plane %d handle\n", i); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } stride_alignment = intel_fb_stride_alignment(fb, i); @@ -2272,7 +2274,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "plane %d pitch (%d) must be at least %u byte aligned\n", i, fb->pitches[i], stride_alignment); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } if (intel_fb_is_gen12_ccs_aux_plane(fb, i)) { @@ -2282,7 +2284,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "ccs aux plane %d pitch (%d) must be %d\n", i, fb->pitches[i], ccs_aux_stride); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } } @@ -2291,7 +2293,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, ret = intel_fill_fb_info(display, intel_fb); if (ret) - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; if (intel_fb_uses_dpt(fb)) { struct i915_address_space *vm; @@ -2317,10 +2319,10 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, err_free_dpt: if (intel_fb_uses_dpt(fb)) intel_dpt_destroy(intel_fb->dpt_vm); +err_bo_framebuffer_fini: + intel_fb_bo_framebuffer_fini(obj); err_frontbuffer_put: intel_frontbuffer_put(intel_fb->frontbuffer); -err: - intel_fb_bo_framebuffer_fini(obj); return ret; } -- cgit v1.2.3 From 9858ea4c29c283f0a8a3cdbb42108d464ece90a8 Mon Sep 17 00:00:00 2001 From: Matthew Schwartz Date: Thu, 9 Oct 2025 14:19:00 +0200 Subject: Revert "drm/amd/display: Only restore backlight after amdgpu_dm_init or dm_resume" This fix regressed the original issue that commit 7875afafba84 ("drm/amd/display: Fix brightness level not retained over reboot") solved, so revert it until a different approach to solve the regression that it caused with AMD_PRIVATE_COLOR is found. Fixes: a490c8d77d50 ("drm/amd/display: Only restore backlight after amdgpu_dm_init or dm_resume") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4620 Cc: stable@vger.kernel.org Signed-off-by: Matthew Schwartz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++++-------- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 7 ------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0d03e324d5b9..6597475e245d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2085,8 +2085,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) dc_hardware_init(adev->dm.dc); - adev->dm.restore_backlight = true; - adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev); if (!adev->dm.hpd_rx_offload_wq) { drm_err(adev_to_drm(adev), "failed to create hpd rx offload workqueue.\n"); @@ -3442,7 +3440,6 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); dc_resume(dm->dc); - adev->dm.restore_backlight = true; amdgpu_dm_irq_resume_early(adev); @@ -9969,6 +9966,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, bool mode_set_reset_required = false; u32 i; struct dc_commit_streams_params params = {dc_state->streams, dc_state->stream_count}; + bool set_backlight_level = false; /* Disable writeback */ for_each_old_connector_in_state(state, connector, old_con_state, i) { @@ -10088,6 +10086,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, acrtc->hw_mode = new_crtc_state->mode; crtc->hwmode = new_crtc_state->mode; mode_set_reset_required = true; + set_backlight_level = true; } else if (modereset_required(new_crtc_state)) { drm_dbg_atomic(dev, "Atomic commit: RESET. crtc id %d:[%p]\n", @@ -10144,16 +10143,13 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, * to fix a flicker issue. * It will cause the dm->actual_brightness is not the current panel brightness * level. (the dm->brightness is the correct panel level) - * So we set the backlight level with dm->brightness value after initial - * set mode. Use restore_backlight flag to avoid setting backlight level - * for every subsequent mode set. + * So we set the backlight level with dm->brightness value after set mode */ - if (dm->restore_backlight) { + if (set_backlight_level) { for (i = 0; i < dm->num_of_edps; i++) { if (dm->backlight_dev[i]) amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); } - dm->restore_backlight = false; } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 009f206226f0..db75e991ac7b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -630,13 +630,6 @@ struct amdgpu_display_manager { */ u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; - /** - * @restore_backlight: - * - * Flag to indicate whether to restore backlight after modeset. - */ - bool restore_backlight; - /** * @aux_hpd_discon_quirk: * -- cgit v1.2.3 From 5c05bcf6ae7732da1bd4dc1958d527b5f07f216a Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:26:12 +0200 Subject: drm/amd/pm: Disable MCLK switching on SI at high pixel clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On various SI GPUs, a flickering can be observed near the bottom edge of the screen when using a single 4K 60Hz monitor over DP. Disabling MCLK switching works around this problem. Reviewed-by: Alex Deucher Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index cf9932e68055..3a9522c17fee 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3500,6 +3500,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, * for these GPUs to calculate bandwidth requirements. */ if (high_pixelclock_count) { + /* Work around flickering lines at the bottom edge + * of the screen when using a single 4K 60Hz monitor. + */ + disable_mclk_switching = true; + /* On Oland, we observe some flickering when two 4K 60Hz * displays are connected, possibly because voltage is too low. * Raise the voltage by requiring a higher SCLK. -- cgit v1.2.3 From 7bdd91abf0cb3ea78160e2e78fb58b12f6a38d55 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:26:13 +0200 Subject: drm/amd: Disable ASPM on SI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4. It's unclear if this is a platform-specific or GPU-specific issue. Disable ASPM on SI for the time being. Reviewed-by: Alex Deucher Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7a899fb4de29..3d032c4e2dce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1882,6 +1882,13 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev) { + /* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4. + * It's unclear if this is a platform-specific or GPU-specific issue. + * Disable ASPM on SI for the time being. + */ + if (adev->family == AMDGPU_FAMILY_SI) + return true; + #if IS_ENABLED(CONFIG_X86) struct cpuinfo_x86 *c = &cpu_data(0); -- cgit v1.2.3 From 5d55ed19d4190d2c210ac05ac7a53f800a8c6fe5 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 27 Aug 2025 14:47:23 +0200 Subject: drm/amdgpu: remove two invalid BUG_ON()s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those can be triggered trivially by userspace. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 66c47c466532..d61eb9f187c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5862,8 +5862,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; - BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); - header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); control |= ib->length_dw | (vmid << 24); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 710ec9c34e43..93fde0f9af87 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -4419,8 +4419,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; - BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); - header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); control |= ib->length_dw | (vmid << 24); -- cgit v1.2.3 From 8f74c70be57527d7b79e2ecf6de1a154d148254d Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 22 Sep 2025 14:18:16 +0200 Subject: drm/amdgpu: block CE CS if not explicitely allowed by module option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Constant Engine found on gfx6-gfx10 HW has been a notorious source of problems. RADV never used it in the first place, radeonsi only used it for a few releases around 2017 for gfx6-gfx9 before dropping support for it as well. While investigating another problem I just recently found that submitting to the CE seems to be completely broken on gfx9 for quite a while. Since nobody complained about that problem it most likely means that nobody is using any of the affected radeonsi versions on current Linux kernels any more. So to potentially phase out the support for the CE and eliminate another source of problems block submitting CE IBs unless it is enabled again using a debug flag. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 +++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2a0df4cabb99..6f5b4a0e0a34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1290,6 +1290,7 @@ struct amdgpu_device { bool debug_disable_gpu_ring_reset; bool debug_vm_userptr; bool debug_disable_ce_logs; + bool debug_enable_ce_cs; /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9cd7741d2254..ba9fb08db094 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -364,6 +364,12 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, if (p->uf_bo && ring->funcs->no_user_fence) return -EINVAL; + if (!p->adev->debug_enable_ce_cs && + chunk_ib->flags & AMDGPU_IB_FLAG_CE) { + dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n"); + return -EINVAL; + } + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index bff25ef3e2d0..61268aa82df4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -144,7 +144,8 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), AMDGPU_DEBUG_SMU_POOL = BIT(7), AMDGPU_DEBUG_VM_USERPTR = BIT(8), - AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9) + AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9), + AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10) }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -2289,6 +2290,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: disable kernel logs of correctable errors\n"); adev->debug_disable_ce_logs = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) { + pr_info("debug: allowing command submission to CE engine\n"); + adev->debug_enable_ce_cs = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) -- cgit v1.2.3 From 357d90be2c7aaa526a840cddffd2b8d676fe75a6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 26 Sep 2025 17:31:32 -0400 Subject: drm/amdgpu: fix handling of harvesting for ip_discovery firmware Chips which use the IP discovery firmware loaded by the driver reported incorrect harvesting information in the ip discovery table in sysfs because the driver only uses the ip discovery firmware for populating sysfs and not for direct parsing for the driver itself as such, the fields that are used to print the harvesting info in sysfs report incorrect data for some IPs. Populate the relevant fields for this case as well. Fixes: 514678da56da ("drm/amdgpu/discovery: fix fw based ip discovery") Acked-by: Tom St Denis Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 73401f0aeb34..dd7b2b796427 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1033,7 +1033,9 @@ static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev, /* Until a uniform way is figured, get mask based on hwid */ switch (hw_id) { case VCN_HWID: - harvest = ((1 << inst) & adev->vcn.inst_mask) == 0; + /* VCN vs UVD+VCE */ + if (!amdgpu_ip_version(adev, VCE_HWIP, 0)) + harvest = ((1 << inst) & adev->vcn.inst_mask) == 0; break; case DMU_HWID: if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK) @@ -2565,7 +2567,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; adev->gmc.num_umc = 4; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 0, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 0, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 0); @@ -2592,7 +2596,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; adev->gmc.num_umc = 4; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 3, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 3, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 1); @@ -2619,8 +2625,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 1; + adev->sdma.sdma_mask = 1; adev->vcn.num_vcn_inst = 1; adev->gmc.num_umc = 2; + adev->gfx.xcc_mask = 1; if (adev->apu_flags & AMD_APU_IS_RAVEN2) { adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 2, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 2, 0); @@ -2665,7 +2673,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); vega20_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; adev->gmc.num_umc = 8; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 0); @@ -2693,8 +2703,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); arct_reg_base_init(adev); adev->sdma.num_instances = 8; + adev->sdma.sdma_mask = 0xff; adev->vcn.num_vcn_inst = 2; adev->gmc.num_umc = 8; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 1); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 1); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 1); @@ -2726,8 +2738,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); aldebaran_reg_base_init(adev); adev->sdma.num_instances = 5; + adev->sdma.sdma_mask = 0x1f; adev->vcn.num_vcn_inst = 2; adev->gmc.num_umc = 4; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 2); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 2); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 4, 0); @@ -2762,6 +2776,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) } else { cyan_skillfish_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(2, 0, 3); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(2, 0, 3); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(5, 0, 1); -- cgit v1.2.3 From 1f22fcb88bfef26a966e9eb242c692c6bf253d47 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 15 Sep 2025 12:37:32 -0400 Subject: drm/amdgpu: handle wrap around in reemit handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compare the sequence numbers directly. Fixes: 77cc0da39c7c ("drm/amdgpu: track ring state associated with a fence") Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index fd8cca241da6..e270df30c279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -790,14 +790,19 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, struct dma_fence *unprocessed; struct dma_fence __rcu **ptr; struct amdgpu_fence *fence; - u64 wptr, i, seqno; + u64 wptr; + u32 seq, last_seq; - seqno = amdgpu_fence_read(ring); + last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; + seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; wptr = ring->fence_drv.signalled_wptr; ring->ring_backup_entries_to_copy = 0; - for (i = seqno + 1; i <= ring->fence_drv.sync_seq; ++i) { - ptr = &ring->fence_drv.fences[i & ring->fence_drv.num_fences_mask]; + do { + last_seq++; + last_seq &= ring->fence_drv.num_fences_mask; + + ptr = &ring->fence_drv.fences[last_seq]; rcu_read_lock(); unprocessed = rcu_dereference(*ptr); @@ -813,7 +818,7 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, wptr = fence->wptr; } rcu_read_unlock(); - } + } while (last_seq != seq); } /* -- cgit v1.2.3 From ff780f4f80323148d43198f2052c14160c8428d3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 3 Sep 2025 13:48:23 -0400 Subject: drm/amdgpu: set an error on all fences from a bad context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we backup ring contents to reemit after a queue reset, we don't backup ring contents from the bad context. When we signal the fences, we should set an error on those fences as well. v2: misc cleanups v3: add locking for fence error, fix comment (Christian) v4: fix wrap around, locking (Christian) Fixes: 77cc0da39c7c ("drm/amdgpu: track ring state associated with a fence") Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 39 +++++++++++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +- 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index e270df30c279..18a7829122d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -758,11 +758,42 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) * @fence: fence of the ring to signal * */ -void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence) +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) { - dma_fence_set_error(&fence->base, -ETIME); - amdgpu_fence_write(fence->ring, fence->seq); - amdgpu_fence_process(fence->ring); + struct dma_fence *unprocessed; + struct dma_fence __rcu **ptr; + struct amdgpu_fence *fence; + struct amdgpu_ring *ring = af->ring; + unsigned long flags; + u32 seq, last_seq; + + last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; + seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; + + /* mark all fences from the guilty context with an error */ + spin_lock_irqsave(&ring->fence_drv.lock, flags); + do { + last_seq++; + last_seq &= ring->fence_drv.num_fences_mask; + + ptr = &ring->fence_drv.fences[last_seq]; + rcu_read_lock(); + unprocessed = rcu_dereference(*ptr); + + if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { + fence = container_of(unprocessed, struct amdgpu_fence, base); + + if (fence == af) + dma_fence_set_error(&fence->base, -ETIME); + else if (fence->context == af->context) + dma_fence_set_error(&fence->base, -ECANCELED); + } + rcu_read_unlock(); + } while (last_seq != seq); + spin_unlock_irqrestore(&ring->fence_drv.lock, flags); + /* signal the guilty fence */ + amdgpu_fence_write(ring, af->seq); + amdgpu_fence_process(ring); } void amdgpu_fence_save_wptr(struct dma_fence *fence) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 8f6ce948c684..5ec5c3ff22bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -811,7 +811,7 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, if (r) return r; - /* signal the fence of the bad job */ + /* signal the guilty fence and set an error on all fences from the context */ if (guilty_fence) amdgpu_fence_driver_guilty_force_completion(guilty_fence); /* Re-emit the non-guilty commands */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b6b649179776..4b46e3c26ff3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -155,7 +155,7 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); -void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence); +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af); void amdgpu_fence_save_wptr(struct dma_fence *fence); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); -- cgit v1.2.3 From 6df8e84aa6b5b1812cc2cacd6b3f5ccbb18cda2b Mon Sep 17 00:00:00 2001 From: Gui-Dong Han Date: Wed, 8 Oct 2025 03:43:27 +0000 Subject: drm/amdgpu: use atomic functions with memory barriers for vm fault info The atomic variable vm_fault_info_updated is used to synchronize access to adev->gmc.vm_fault_info between the interrupt handler and get_vm_fault_info(). The default atomic functions like atomic_set() and atomic_read() do not provide memory barriers. This allows for CPU instruction reordering, meaning the memory accesses to vm_fault_info and the vm_fault_info_updated flag are not guaranteed to occur in the intended order. This creates a race condition that can lead to inconsistent or stale data being used. The previous implementation, which used an explicit mb(), was incomplete and inefficient. It failed to account for all potential CPU reorderings, such as the access of vm_fault_info being reordered before the atomic_read of the flag. This approach is also more verbose and less performant than using the proper atomic functions with acquire/release semantics. Fix this by switching to atomic_set_release() and atomic_read_acquire(). These functions provide the necessary acquire and release semantics, which act as memory barriers to ensure the correct order of operations. It is also more efficient and idiomatic than using explicit full memory barriers. Fixes: b97dfa27ef3a ("drm/amdgpu: save vm fault information for amdkfd") Cc: stable@vger.kernel.org Signed-off-by: Gui-Dong Han Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 ++--- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 7 +++---- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 83020963dfde..a2ca9acf8c4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2329,10 +2329,9 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem) int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *mem) { - if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { + if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) { *mem = *adev->gmc.vm_fault_info; - mb(); /* make sure read happened */ - atomic_set(&adev->gmc.vm_fault_info_updated, 0); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 0); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 93d7ccb7d013..0e5e54d0a9a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1068,7 +1068,7 @@ static int gmc_v7_0_sw_init(struct amdgpu_ip_block *ip_block) GFP_KERNEL); if (!adev->gmc.vm_fault_info) return -ENOMEM; - atomic_set(&adev->gmc.vm_fault_info_updated, 0); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 0); return 0; } @@ -1290,7 +1290,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) - && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) { struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, @@ -1306,8 +1306,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, info->prot_read = protections & 0x8 ? true : false; info->prot_write = protections & 0x10 ? true : false; info->prot_exec = protections & 0x20 ? true : false; - mb(); - atomic_set(&adev->gmc.vm_fault_info_updated, 1); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 1); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index c5e2a2c41e06..e1509480dfc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1183,7 +1183,7 @@ static int gmc_v8_0_sw_init(struct amdgpu_ip_block *ip_block) GFP_KERNEL); if (!adev->gmc.vm_fault_info) return -ENOMEM; - atomic_set(&adev->gmc.vm_fault_info_updated, 0); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 0); return 0; } @@ -1478,7 +1478,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) - && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) { struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, @@ -1494,8 +1494,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, info->prot_read = protections & 0x8 ? true : false; info->prot_write = protections & 0x10 ? true : false; info->prot_exec = protections & 0x20 ? true : false; - mb(); - atomic_set(&adev->gmc.vm_fault_info_updated, 1); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 1); } return 0; -- cgit v1.2.3 From ef38b4eab146715bc68d45029257f5e69ea3f2cd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Oct 2025 16:40:57 -0400 Subject: drm/amdgpu: drop unused structures in amdgpu_drm.h These were never used and are duplicated with the interface that is used. Maybe leftovers from a previous revision of the patch that added them. Fixes: 90c448fef312 ("drm/amdgpu: add new AMDGPU_INFO subquery for userq objects") Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index cd7402e36b6d..406a42be429b 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1555,27 +1555,6 @@ struct drm_amdgpu_info_hw_ip { __u32 userq_num_slots; }; -/* GFX metadata BO sizes and alignment info (in bytes) */ -struct drm_amdgpu_info_uq_fw_areas_gfx { - /* shadow area size */ - __u32 shadow_size; - /* shadow area base virtual mem alignment */ - __u32 shadow_alignment; - /* context save area size */ - __u32 csa_size; - /* context save area base virtual mem alignment */ - __u32 csa_alignment; -}; - -/* IP specific fw related information used in the - * subquery AMDGPU_INFO_UQ_FW_AREAS - */ -struct drm_amdgpu_info_uq_fw_areas { - union { - struct drm_amdgpu_info_uq_fw_areas_gfx gfx; - }; -}; - struct drm_amdgpu_info_num_handles { /** Max handles as supported by firmware for UVD */ __u32 uvd_max_handles; -- cgit v1.2.3 From 6917112af2ba36c5f19075eb9f2933ffd07e55bf Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Mon, 13 Oct 2025 08:06:42 +0200 Subject: drm/amd/powerplay: Fix CIK shutdown temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove extra multiplication. CIK GPUs such as Hawaii appear to use PP_TABLE_V0 in which case the shutdown temperature is hardcoded in smu7_init_dpm_defaults and is already multiplied by 1000. The value was mistakenly multiplied another time by smu7_get_thermal_temperature_range. Fixes: 4ba082572a42 ("drm/amd/powerplay: export the thermal ranges of VI asics (V2)") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1676 Reviewed-by: Alex Deucher Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 8da882c51856..9b28c0728269 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -5444,8 +5444,7 @@ static int smu7_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->max = table_info->cac_dtp_table->usSoftwareShutdownTemp * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; else if (hwmgr->pp_table_version == PP_TABLE_V0) - thermal_data->max = data->thermal_temp_setting.temperature_shutdown * - PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->max = data->thermal_temp_setting.temperature_shutdown; thermal_data->sw_ctf_threshold = thermal_data->max; -- cgit v1.2.3 From 74de0eaa00eac2e0cbad1dda6dcf8f44ab27629e Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Fri, 10 Oct 2025 23:32:40 +0530 Subject: drm/amdgpu: fix bit shift logic BIT_ULL(n) sets nth bit, remove explicit shift and set the position Fixes: a7a411e24626 ("drm/amdgpu: fix shift-out-of-bounds in amdgpu_debugfs_jpeg_sched_mask_set") Signed-off-by: Sathishkumar S Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 6b7d66b6d4cc..63ee6ba6a931 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -371,7 +371,7 @@ static int amdgpu_debugfs_jpeg_sched_mask_set(void *data, u64 val) for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; - if (val & (BIT_ULL(1) << ((i * adev->jpeg.num_jpeg_rings) + j))) + if (val & (BIT_ULL((i * adev->jpeg.num_jpeg_rings) + j))) ring->sched.ready = true; else ring->sched.ready = false; -- cgit v1.2.3 From 33cc891b56b93cad1a83263eaf2e417436f70c82 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 7 Oct 2025 10:10:52 +0200 Subject: drm/amdgpu: hide VRAM sysfs attributes on GPUs without VRAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise accessing them can cause a crash. Signed-off-by: Christian König Tested-by: Mangesh Gadre Acked-by: Alex Deucher Reviewed-by: Arunpravin Paneer Selvam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a5adb2ed9b3c..9d934c07fa6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -234,6 +234,9 @@ static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj, !adev->gmc.vram_vendor) return 0; + if (!ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) + return 0; + return attr->mode; } -- cgit v1.2.3 From 883f309add55060233bf11c1ea6947140372920f Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Mon, 13 Oct 2025 13:46:12 +0800 Subject: drm/amdgpu: Fix NULL pointer dereference in VRAM logic for APU devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, APU platforms (and other scenarios with uninitialized VRAM managers) triggered a NULL pointer dereference in `ttm_resource_manager_usage()`. The root cause is not that the `struct ttm_resource_manager *man` pointer itself is NULL, but that `man->bdev` (the backing device pointer within the manager) remains uninitialized (NULL) on APUs—since APUs lack dedicated VRAM and do not fully set up VRAM manager structures. When `ttm_resource_manager_usage()` attempts to acquire `man->bdev->lru_lock`, it dereferences the NULL `man->bdev`, leading to a kernel OOPS. 1. **amdgpu_cs.c**: Extend the existing bandwidth control check in `amdgpu_cs_get_threshold_for_moves()` to include a check for `ttm_resource_manager_used()`. If the manager is not used (uninitialized `bdev`), return 0 for migration thresholds immediately—skipping VRAM-specific logic that would trigger the NULL dereference. 2. **amdgpu_kms.c**: Update the `AMDGPU_INFO_VRAM_USAGE` ioctl and memory info reporting to use a conditional: if the manager is used, return the real VRAM usage; otherwise, return 0. This avoids accessing `man->bdev` when it is NULL. 3. **amdgpu_virt.c**: Modify the vf2pf (virtual function to physical function) data write path. Use `ttm_resource_manager_used()` to check validity: if the manager is usable, calculate `fb_usage` from VRAM usage; otherwise, set `fb_usage` to 0 (APUs have no discrete framebuffer to report). This approach is more robust than APU-specific checks because it: - Works for all scenarios where the VRAM manager is uninitialized (not just APUs), - Aligns with TTM's design by using its native helper function, - Preserves correct behavior for discrete GPUs (which have fully initialized `man->bdev` and pass the `ttm_resource_manager_used()` check). v4: use ttm_resource_manager_used(&adev->mman.vram_mgr.manager) instead of checking the adev->gmc.is_app_apu flag (Christian) Reviewed-by: Christian König Suggested-by: Lijo Lazar Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 7 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ba9fb08db094..2f6a96af7fb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -708,7 +708,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, */ const s64 us_upper_bound = 200000; - if (!adev->mm_stats.log2_max_MBps) { + if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) { *max_bytes = 0; *max_vis_bytes = 0; return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index a9327472c651..b3e6b3fcdf2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -758,7 +758,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: - ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); + ui64 = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) : 0; return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VIS_VRAM_USAGE: ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); @@ -804,8 +805,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) mem.vram.usable_heap_size = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size) - AMDGPU_VM_RESERVED_VRAM; - mem.vram.heap_usage = - ttm_resource_manager_usage(vram_man); + mem.vram.heap_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(vram_man) : 0; mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 3328ab63376b..f96beb96c75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -598,8 +598,8 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->driver_cert = 0; vf2pf_info->os_info.all = 0; - vf2pf_info->fb_usage = - ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20; + vf2pf_info->fb_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20 : 0; vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20; vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20; -- cgit v1.2.3 From d0de79f66a80eeb849033fae34bd07a69ce72235 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 9 Oct 2025 10:45:42 -0400 Subject: drm/amdgpu: fix gfx12 mes packet status return check GFX12 MES uses low 32 bits of status return for success (1 or 0) and high bits for debug information if low bits are 0. GFX11 MES doesn't do this so checking full 64-bit status return for 1 or 0 is still valid. Signed-off-by: Jonathan Kim Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index aff06f06aeee..e3149196143e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -228,7 +228,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, pipe, x_pkt->header.opcode); r = amdgpu_fence_wait_polling(ring, seq, timeout); - if (r < 1 || !*status_ptr) { + + /* + * status_ptr[31:0] == 0 (fail) or status_ptr[63:0] == 1 (success). + * If status_ptr[31:0] == 0 then status_ptr[63:32] will have debug error information. + */ + if (r < 1 || !(lower_32_bits(*status_ptr))) { if (misc_op_str) dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", -- cgit v1.2.3 From 8745ca5efb2aad0b6591d9b8cd48573ea49c929d Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 9 Oct 2025 10:48:09 -0400 Subject: drm/amdgpu: fix initialization of doorbell array for detect and hang Initialized doorbells should be set to invalid rather than 0 to prevent driver from over counting hung doorbells since it checks against the invalid value to begin with. Signed-off-by: Jonathan Kim Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 5bf9be073cdd..30e1fb510600 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -409,7 +409,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, return -EINVAL; /* Clear the doorbell array before detection */ - memset(adev->mes.hung_queue_db_array_cpu_addr, 0, + memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET, adev->mes.hung_queue_db_array_size * sizeof(u32)); input.queue_type = queue_type; input.detect_only = detect_only; -- cgit v1.2.3 From 0ef930e1faca6418316e5b9a3b4d1f6ae9e5b240 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 9 Oct 2025 11:28:19 -0400 Subject: drm/amdgpu: fix hung reset queue array memory allocation By design the MES will return an array result that is twice the number of hung doorbells it can report. i.e. if up k reported doorbells are supported, then the second half of the array, also of length k, holds the HQD information (type/queue/pipe) where queue 1 corresponds to index 0 and k, queue 2 corresponds to index 1 and k + 1 etc ... The driver will use the HDQ info to target queue/pipe reset for hardware scheduled user compute queues. Signed-off-by: Jonathan Kim Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 + drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 8 +++++--- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 8 +++++--- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 30e1fb510600..94973018f761 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -420,12 +420,17 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, dev_err(adev->dev, "failed to detect and reset\n"); } else { *hung_db_num = 0; - for (i = 0; i < adev->mes.hung_queue_db_array_size; i++) { + for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { hung_db_array[i] = db_array[i]; *hung_db_num += 1; } } + + /* + * TODO: return HQD info for MES scheduled user compute queue reset cases + * stored in hung_db_array hqd info offset to full array size + */ } return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 6b506fc72f58..97c137c90f97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -149,6 +149,7 @@ struct amdgpu_mes { void *resource_1_addr[AMDGPU_MAX_MES_PIPES]; int hung_queue_db_array_size; + int hung_queue_hqd_info_offset; struct amdgpu_bo *hung_queue_db_array_gpu_obj; uint64_t hung_queue_db_array_gpu_addr; void *hung_queue_db_array_cpu_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 2db9b2c63693..1cd9eaeef38f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -208,10 +208,10 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev, struct amdgpu_userq_mgr *uqm, *tmp; unsigned int hung_db_num = 0; int queue_id, r, i; - u32 db_array[4]; + u32 db_array[8]; - if (db_array_size > 4) { - dev_err(adev->dev, "DB array size (%d vs 4) too small\n", + if (db_array_size > 8) { + dev_err(adev->dev, "DB array size (%d vs 8) too small\n", db_array_size); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index e82188431f79..da575bb1377f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -66,7 +66,8 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); #define GFX_MES_DRAM_SIZE 0x80000 #define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE) -#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 4 +#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset, [4:7] = hqd info */ +#define MES11_HUNG_HQD_INFO_OFFSET 4 static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring) { @@ -1720,8 +1721,9 @@ static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe, r; - adev->mes.hung_queue_db_array_size = - MES11_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_db_array_size = MES11_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_hqd_info_offset = MES11_HUNG_HQD_INFO_OFFSET; + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index e3149196143e..7f3512d9de07 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -47,7 +47,8 @@ static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 -#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 4 +#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset [4:7] hqd info */ +#define MES12_HUNG_HQD_INFO_OFFSET 4 static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring) { @@ -1904,8 +1905,9 @@ static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe, r; - adev->mes.hung_queue_db_array_size = - MES12_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_db_array_size = MES12_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_hqd_info_offset = MES12_HUNG_HQD_INFO_OFFSET; + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { r = amdgpu_mes_init_microcode(adev, pipe); if (r) -- cgit v1.2.3 From 277bb0f83e98261018ddd82b7ab8154bb9b93237 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 5 Jun 2025 10:18:37 -0400 Subject: drm/amdgpu: enable suspend/resume all for gfx 12 Suspend/resume all gangs has been available for GFX12 for a while now so enable it. Signed-off-by: Jonathan Kim Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 94973018f761..4883adcfbb4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -691,14 +691,11 @@ out: bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) { uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; - bool is_supported = false; - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && - amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && - mes_rev >= 0x63) - is_supported = true; - - return is_supported; + return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && + mes_rev >= 0x63) || + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)); } /* Fix me -- node_id is used to identify the correct MES instances in the future */ -- cgit v1.2.3 From 079ae5118e1f0dcf5b1ab68ffdb5760b06ed79a2 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 18 Jun 2025 10:31:15 -0400 Subject: drm/amdkfd: fix suspend/resume all calls in mes based eviction path Suspend/resume all gangs should be done with the device lock is held. Signed-off-by: Jonathan Kim Acked-by: Alex Deucher Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 73 +++++++--------------- 1 file changed, 21 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 6c5c7c1bf5ed..6e7bc983fc0b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1209,6 +1209,15 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, pr_debug_ratelimited("Evicting process pid %d queues\n", pdd->process->lead_thread->pid); + if (dqm->dev->kfd->shared_resources.enable_mes) { + pdd->last_evict_timestamp = get_jiffies_64(); + retval = suspend_all_queues_mes(dqm); + if (retval) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + } + /* Mark all queues as evicted. Deactivate all active queues on * the qpd. */ @@ -1221,23 +1230,27 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, decrement_queue_count(dqm, qpd, q); if (dqm->dev->kfd->shared_resources.enable_mes) { - int err; - - err = remove_queue_mes(dqm, q, qpd); - if (err) { + retval = remove_queue_mes(dqm, q, qpd); + if (retval) { dev_err(dev, "Failed to evict queue %d\n", q->properties.queue_id); - retval = err; + goto out; } } } - pdd->last_evict_timestamp = get_jiffies_64(); - if (!dqm->dev->kfd->shared_resources.enable_mes) + + if (!dqm->dev->kfd->shared_resources.enable_mes) { + pdd->last_evict_timestamp = get_jiffies_64(); retval = execute_queues_cpsch(dqm, qpd->is_debug ? KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); + } else { + retval = resume_all_queues_mes(dqm); + if (retval) + dev_err(dev, "Resuming all queues failed"); + } out: dqm_unlock(dqm); @@ -3098,61 +3111,17 @@ out: return ret; } -static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) -{ - struct device *dev = dqm->dev->adev->dev; - int ret = 0; - - /* Check if process is already evicted */ - dqm_lock(dqm); - if (qpd->evicted) { - /* Increment the evicted count to make sure the - * process stays evicted before its terminated. - */ - qpd->evicted++; - dqm_unlock(dqm); - goto out; - } - dqm_unlock(dqm); - - ret = suspend_all_queues_mes(dqm); - if (ret) { - dev_err(dev, "Suspending all queues failed"); - goto out; - } - - ret = dqm->ops.evict_process_queues(dqm, qpd); - if (ret) { - dev_err(dev, "Evicting process queues failed"); - goto out; - } - - ret = resume_all_queues_mes(dqm); - if (ret) - dev_err(dev, "Resuming all queues failed"); - -out: - return ret; -} - int kfd_evict_process_device(struct kfd_process_device *pdd) { struct device_queue_manager *dqm; struct kfd_process *p; - int ret = 0; p = pdd->process; dqm = pdd->dev->dqm; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); - if (dqm->dev->kfd->shared_resources.enable_mes) - ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd); - else - ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); - - return ret; + return dqm->ops.evict_process_queues(dqm, &pdd->qpd); } int reserve_debug_trap_vmid(struct device_queue_manager *dqm, -- cgit v1.2.3 From 927069c4ac2cd1a37efa468596fb5b8f86db9df0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Oct 2025 12:05:31 -0600 Subject: Revert "io_uring/rw: drop -EOPNOTSUPP check in __io_complete_rw_common()" This reverts commit 90bfb28d5fa8127a113a140c9791ea0b40ab156a. Kevin reports that this commit causes an issue for him with LVM snapshots, most likely because of turning off NOWAIT support while a snapshot is being created. This makes -EOPNOTSUPP bubble back through the completion handler, where io_uring read/write handling should just retry it. Reinstate the previous check removed by the referenced commit. Cc: stable@vger.kernel.org Fixes: 90bfb28d5fa8 ("io_uring/rw: drop -EOPNOTSUPP check in __io_complete_rw_common()") Reported-by: Salvatore Bonaccorso Reported-by: Kevin Lumik Link: https://lore.kernel.org/io-uring/cceb723c-051b-4de2-9a4c-4aa82e1619ee@kernel.dk/ Signed-off-by: Jens Axboe --- io_uring/rw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 08882648d569..a0f9d2021e3f 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -542,7 +542,7 @@ static void __io_complete_rw_common(struct io_kiocb *req, long res) { if (res == req->cqe.res) return; - if (res == -EAGAIN && io_rw_should_reissue(req)) { + if ((res == -EOPNOTSUPP || res == -EAGAIN) && io_rw_should_reissue(req)) { req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE; } else { req_set_fail(req); -- cgit v1.2.3 From 8db4a1d146f83c6bdb0f5b98c50c509ae8549827 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 7 Oct 2025 13:39:05 -0400 Subject: NFSv4/flexfiles: fix to allocate mirror->dss before use Move mirror_array's dss_count initialization and dss allocation to ff_layout_alloc_mirror(), just before the loop that initializes each nfs4_ff_layout_ds_stripe's nfs_file_localio. Also handle NULL return from kcalloc() and remove one level of indent in ff_layout_alloc_mirror(). This commit fixes dangling nfsd_serv refcount issues seen when using NFS LOCALIO and then attempting to stop the NFSD service. Fixes: 20b1d75fb840 ("NFSv4/flexfiles: Add support for striped layouts") Signed-off-by: Mike Snitzer Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 35 ++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index df01d2876b68..9056f05a67dc 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -270,19 +270,31 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror) mirror->layout = NULL; } -static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) +static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(u32 dss_count, + gfp_t gfp_flags) { struct nfs4_ff_layout_mirror *mirror; - u32 dss_id; mirror = kzalloc(sizeof(*mirror), gfp_flags); - if (mirror != NULL) { - spin_lock_init(&mirror->lock); - refcount_set(&mirror->ref, 1); - INIT_LIST_HEAD(&mirror->mirrors); - for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) - nfs_localio_file_init(&mirror->dss[dss_id].nfl); + if (mirror == NULL) + return NULL; + + spin_lock_init(&mirror->lock); + refcount_set(&mirror->ref, 1); + INIT_LIST_HEAD(&mirror->mirrors); + + mirror->dss_count = dss_count; + mirror->dss = + kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe), + gfp_flags); + if (mirror->dss == NULL) { + kfree(mirror); + return NULL; } + + for (u32 dss_id = 0; dss_id < mirror->dss_count; dss_id++) + nfs_localio_file_init(&mirror->dss[dss_id].nfl); + return mirror; } @@ -507,17 +519,12 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, if (dss_count > 1 && stripe_unit == 0) goto out_err_free; - fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags); + fls->mirror_array[i] = ff_layout_alloc_mirror(dss_count, gfp_flags); if (fls->mirror_array[i] == NULL) { rc = -ENOMEM; goto out_err_free; } - fls->mirror_array[i]->dss_count = dss_count; - fls->mirror_array[i]->dss = - kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe), - gfp_flags); - for (dss_id = 0; dss_id < dss_count; dss_id++) { dss_info = &fls->mirror_array[i]->dss[dss_id]; dss_info->mirror = fls->mirror_array[i]; -- cgit v1.2.3 From 7a84394f02ab1985ebbe0a8d6f6d69bd040de4b3 Mon Sep 17 00:00:00 2001 From: Joshua Watt Date: Tue, 7 Oct 2025 15:22:58 -0600 Subject: NFS4: Apply delay_retrans to async operations The setting of delay_retrans is applied to synchronous RPC operations because the retransmit count is stored in same struct nfs4_exception that is passed each time an error is checked. However, for asynchronous operations (READ, WRITE, LOCKU, CLOSE, DELEGRETURN), a new struct nfs4_exception is made on the stack each time the task callback is invoked. This means that the retransmit count is always zero and thus delay_retrans never takes effect. Apply delay_retrans to these operations by tracking and updating their retransmit count. Change-Id: Ieb33e046c2b277cb979caa3faca7f52faf0568c9 Signed-off-by: Joshua Watt Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 13 +++++++++++++ include/linux/nfs_xdr.h | 1 + 2 files changed, 14 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f58098417142..411776718494 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3636,6 +3636,7 @@ struct nfs4_closedata { } lr; struct nfs_fattr fattr; unsigned long timestamp; + unsigned short retrans; }; static void nfs4_free_closedata(void *data) @@ -3664,6 +3665,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) .state = state, .inode = calldata->inode, .stateid = &calldata->arg.stateid, + .retrans = calldata->retrans, }; if (!nfs4_sequence_done(task, &calldata->res.seq_res)) @@ -3711,6 +3713,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) default: task->tk_status = nfs4_async_handle_exception(task, server, task->tk_status, &exception); + calldata->retrans = exception.retrans; if (exception.retry) goto out_restart; } @@ -5593,9 +5596,11 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) .inode = hdr->inode, .state = hdr->args.context->state, .stateid = &hdr->args.stateid, + .retrans = hdr->retrans, }; task->tk_status = nfs4_async_handle_exception(task, server, task->tk_status, &exception); + hdr->retrans = exception.retrans; if (exception.retry) { rpc_restart_call_prepare(task); return -EAGAIN; @@ -5709,10 +5714,12 @@ static int nfs4_write_done_cb(struct rpc_task *task, .inode = hdr->inode, .state = hdr->args.context->state, .stateid = &hdr->args.stateid, + .retrans = hdr->retrans, }; task->tk_status = nfs4_async_handle_exception(task, NFS_SERVER(inode), task->tk_status, &exception); + hdr->retrans = exception.retrans; if (exception.retry) { rpc_restart_call_prepare(task); return -EAGAIN; @@ -6726,6 +6733,7 @@ struct nfs4_delegreturndata { struct nfs_fh fh; nfs4_stateid stateid; unsigned long timestamp; + unsigned short retrans; struct { struct nfs4_layoutreturn_args arg; struct nfs4_layoutreturn_res res; @@ -6746,6 +6754,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) .inode = data->inode, .stateid = &data->stateid, .task_is_privileged = data->args.seq_args.sa_privileged, + .retrans = data->retrans, }; if (!nfs4_sequence_done(task, &data->res.seq_res)) @@ -6817,6 +6826,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) task->tk_status = nfs4_async_handle_exception(task, data->res.server, task->tk_status, &exception); + data->retrans = exception.retrans; if (exception.retry) goto out_restart; } @@ -7093,6 +7103,7 @@ struct nfs4_unlockdata { struct file_lock fl; struct nfs_server *server; unsigned long timestamp; + unsigned short retrans; }; static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, @@ -7147,6 +7158,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) struct nfs4_exception exception = { .inode = calldata->lsp->ls_state->inode, .stateid = &calldata->arg.stateid, + .retrans = calldata->retrans, }; if (!nfs4_sequence_done(task, &calldata->res.seq_res)) @@ -7180,6 +7192,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) task->tk_status = nfs4_async_handle_exception(task, calldata->server, task->tk_status, &exception); + calldata->retrans = exception.retrans; if (exception.retry) rpc_restart_call_prepare(task); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d56583572c98..31463286402f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1659,6 +1659,7 @@ struct nfs_pgio_header { void *netfs; #endif + unsigned short retrans; int pnfs_error; int error; /* merge with pnfs_error */ unsigned int good_bytes; /* boundary of good data */ -- cgit v1.2.3 From 9ff022f3820a31507cb93be6661bf5f3ca0609a4 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 9 Oct 2025 16:42:12 -0400 Subject: NFS: check if suid/sgid was cleared after a write as needed I noticed xfstests generic/193 and generic/355 started failing against knfsd after commit e7a8ebc305f2 ("NFSD: Offer write delegation for OPEN with OPEN4_SHARE_ACCESS_WRITE"). I ran those same tests against ONTAP (which has had write delegation support for a lot longer than knfsd) and they fail there too... so while it's a new failure against knfsd, it isn't an entirely new failure. Add the NFS_INO_REVAL_FORCED flag so that the presence of a delegation doesn't keep the inode from being revalidated to fetch the updated mode. Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0fb6905736d5..336c510f3750 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1535,7 +1535,8 @@ static int nfs_writeback_done(struct rpc_task *task, /* Deal with the suid/sgid bit corner case */ if (nfs_should_remove_suid(inode)) { spin_lock(&inode->i_lock); - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE + | NFS_INO_REVAL_FORCED); spin_unlock(&inode->i_lock); } return 0; -- cgit v1.2.3 From 9bb3baa9d1604cd20f49ae7dac9306b4037a0e7a Mon Sep 17 00:00:00 2001 From: Joshua Watt Date: Thu, 9 Oct 2025 15:48:04 -0600 Subject: NFS4: Fix state renewals missing after boot Since the last renewal time was initialized to 0 and jiffies start counting at -5 minutes, any clients connected in the first 5 minutes after a reboot would have their renewal timer set to a very long interval. If the connection was idle, this would result in the client state timing out on the server and the next call to the server would return NFS4ERR_BADSESSION. Fix this by initializing the last renewal time to the current jiffies instead of 0. Signed-off-by: Joshua Watt Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 6fddf43d729c..5998d6bd8a4f 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -222,6 +222,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; clp->cl_mig_gen = 1; + clp->cl_last_renewal = jiffies; #if IS_ENABLED(CONFIG_NFS_V4_1) init_waitqueue_head(&clp->cl_lock_waitq); #endif -- cgit v1.2.3 From 02e7567f5da023524476053a38c54f4f19130959 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Wed, 1 Oct 2025 14:03:37 +0800 Subject: cxl/port: Avoid missing port component registers setup port->nr_dports is used to represent how many dports added to the cxl port, it will increase in add_dport() when a new dport is being added to the cxl port, but it will not be reduced when a dport is removed from the cxl port. Currently, when the first dport is added to a cxl port, it will trigger component registers setup on the cxl port, the implementation is using port->nr_dports to confirm if the dport is the first dport. A corner case here is that adding dport could fail after port->nr_dports updating and before checking port->nr_dports for component registers setup. If the failure happens during the first dport attaching, it will cause that CXL subsystem has not chance to execute component registers setup for the cxl port. the failure flow like below: port->nr_dports = 0 dport 1 adding to the port: add_dport() # port->nr_dports: 1 failed on devm_add_action_or_reset() or sysfs_create_link() return error # port->nr_dports: 1 dport 2 adding to the port: add_dport() # port->nr_dports: 2 no failure skip component registers setup because of port->nr_dports is 2 The solution here is that moving component registers setup closer to add_dport(), so if add_dport() is executed correctly for the first dport, component registers setup on the port will be executed immediately after that. Fixes: f6ee24913de2 ("cxl: Move port register setup to when first dport appear") Signed-off-by: Li Ming Reviewed-by: Dave Jiang Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang --- drivers/cxl/core/port.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index d5f71eb1ade8..8128fd2b5b31 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1182,6 +1182,20 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (rc) return ERR_PTR(rc); + /* + * Setup port register if this is the first dport showed up. Having + * a dport also means that there is at least 1 active link. + */ + if (port->nr_dports == 1 && + port->component_reg_phys != CXL_RESOURCE_NONE) { + rc = cxl_port_setup_regs(port, port->component_reg_phys); + if (rc) { + xa_erase(&port->dports, (unsigned long)dport->dport_dev); + return ERR_PTR(rc); + } + port->component_reg_phys = CXL_RESOURCE_NONE; + } + get_device(dport_dev); rc = devm_add_action_or_reset(host, cxl_dport_remove, dport); if (rc) @@ -1200,18 +1214,6 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, cxl_debugfs_create_dport_dir(dport); - /* - * Setup port register if this is the first dport showed up. Having - * a dport also means that there is at least 1 active link. - */ - if (port->nr_dports == 1 && - port->component_reg_phys != CXL_RESOURCE_NONE) { - rc = cxl_port_setup_regs(port, port->component_reg_phys); - if (rc) - return ERR_PTR(rc); - port->component_reg_phys = CXL_RESOURCE_NONE; - } - return dport; } -- cgit v1.2.3 From 2b929b6eec0c7c45eb554256d349c16c0ba1df3c Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Mon, 13 Oct 2025 20:33:06 +0300 Subject: ALSA: usb-audio: add mixer_playback_min_mute quirk for Logitech H390 ID 046d:0a8f Logitech, Inc. H390 headset with microphone is reported to have muted min playback volume. Apply quirk for that. Link: https://gitlab.freedesktop.org/pipewire/pipewire/-/issues/4929 Signed-off-by: Pauli Virtanen Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 634cb4fb586f..43793a5c3f51 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2180,6 +2180,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_MIC_RES_384), DEVICE_FLG(0x046d, 0x09a4, /* Logitech QuickCam E 3500 */ QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), + DEVICE_FLG(0x046d, 0x0a8f, /* Logitech H390 headset */ + QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE), DEVICE_FLG(0x0499, 0x1506, /* Yamaha THR5 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x0499, 0x1509, /* Steinberg UR22 */ -- cgit v1.2.3 From 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 10 Oct 2025 12:08:35 -0500 Subject: x86/resctrl: Fix miscount of bandwidth event when reactivating previously unavailable RMID Users can create as many monitoring groups as the number of RMIDs supported by the hardware. However, on AMD systems, only a limited number of RMIDs are guaranteed to be actively tracked by the hardware. RMIDs that exceed this limit are placed in an "Unavailable" state. When a bandwidth counter is read for such an RMID, the hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62). When such an RMID starts being tracked again the hardware counter is reset to zero. MSR_IA32_QM_CTR.Unavailable remains set on first read after tracking re-starts and is clear on all subsequent reads as long as the RMID is tracked. resctrl miscounts the bandwidth events after an RMID transitions from the "Unavailable" state back to being tracked. This happens because when the hardware starts counting again after resetting the counter to zero, resctrl in turn compares the new count against the counter value stored from the previous time the RMID was tracked. This results in resctrl computing an event value that is either undercounting (when new counter is more than stored counter) or a mistaken overflow (when new counter is less than stored counter). Reset the stored value (arch_mbm_state::prev_msr) of MSR_IA32_QM_CTR to zero whenever the RMID is in the "Unavailable" state to ensure accurate counting after the RMID resets to zero when it starts to be tracked again. Example scenario that results in mistaken overflow ================================================== 1. The resctrl filesystem is mounted, and a task is assigned to a monitoring group. $mount -t resctrl resctrl /sys/fs/resctrl $mkdir /sys/fs/resctrl/mon_groups/test1/ $echo 1234 > /sys/fs/resctrl/mon_groups/test1/tasks $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes 21323 <- Total bytes on domain 0 "Unavailable" <- Total bytes on domain 1 Task is running on domain 0. Counter on domain 1 is "Unavailable". 2. The task runs on domain 0 for a while and then moves to domain 1. The counter starts incrementing on domain 1. $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes 7345357 <- Total bytes on domain 0 4545 <- Total bytes on domain 1 3. At some point, the RMID in domain 0 transitions to the "Unavailable" state because the task is no longer executing in that domain. $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes "Unavailable" <- Total bytes on domain 0 434341 <- Total bytes on domain 1 4. Since the task continues to migrate between domains, it may eventually return to domain 0. $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes 17592178699059 <- Overflow on domain 0 3232332 <- Total bytes on domain 1 In this case, the RMID on domain 0 transitions from "Unavailable" state to active state. The hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62) when the counter is read and begins tracking the RMID counting from 0. Subsequent reads succeed but return a value smaller than the previously saved MSR value (7345357). Consequently, the resctrl's overflow logic is triggered, it compares the previous value (7345357) with the new, smaller value and incorrectly interprets this as a counter overflow, adding a large delta. In reality, this is a false positive: the counter did not overflow but was simply reset when the RMID transitioned from "Unavailable" back to active state. Here is the text from APM [1] available from [2]. "In PQOS Version 2.0 or higher, the MBM hardware will set the U bit on the first QM_CTR read when it begins tracking an RMID that it was not previously tracking. The U bit will be zero for all subsequent reads from that RMID while it is still tracked by the hardware. Therefore, a QM_CTR read with the U bit set when that RMID is in use by a processor can be considered 0 when calculating the difference with a subsequent read." [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming Publication # 24593 Revision 3.41 section 19.3.3 Monitoring L3 Memory Bandwidth (MBM). [ bp: Split commit message into smaller paragraph chunks for better consumption. ] Fixes: 4d05bf71f157d ("x86/resctrl: Introduce AMD QOS feature") Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Reinette Chatre Cc: stable@vger.kernel.org # needs adjustments for <= v6.17 Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] --- arch/x86/kernel/cpu/resctrl/monitor.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index c8945610d455..2cd25a0d4637 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -242,7 +242,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *ignored) { + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); int cpu = cpumask_any(&d->hdr.cpu_mask); + struct arch_mbm_state *am; u64 msr_val; u32 prmid; int ret; @@ -251,12 +253,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, prmid = logical_rmid_to_physical_rmid(cpu, rmid); ret = __rmid_read_phys(prmid, eventid, &msr_val); - if (ret) - return ret; - *val = get_corrected_val(r, d, rmid, eventid, msr_val); + if (!ret) { + *val = get_corrected_val(r, d, rmid, eventid, msr_val); + } else if (ret == -EINVAL) { + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) + am->prev_msr = 0; + } - return 0; + return ret; } static int __cntr_id_read(u32 cntr_id, u64 *val) -- cgit v1.2.3 From c282993ccd97ad627d213645dc485086de034647 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 13 Oct 2025 19:10:22 +0900 Subject: can: remove false statement about 1:1 mapping between DLC and length The CAN-FD section of can.rst still states that there is a 1:1 mapping between the Classical CAN DLC and its length. This is only true for the DLC values up to 8. Beyond that point, the length remains at 8. For reference, the mapping between the CAN DLC and the length is given in below table [1]: DLC value CBFF and CEFF FBFF and FEFF [decimal] [byte] [byte] ---------------------------------------------- 0 0 0 1 1 1 2 2 2 3 3 3 4 4 4 5 5 5 6 6 6 7 7 7 8 8 8 9 8 12 10 8 16 11 8 20 12 8 24 13 8 32 14 8 48 15 8 64 Remove the erroneous statement. Instead just state that the length of a Classical CAN frame ranges from 0 to 8. [1] ISO 11898-1:2024, Table 5 -- DLC: coding of the four LSB Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20251013-can-fd-doc-v2-1-5d53bdc8f2ad@kernel.org Signed-off-by: Marc Kleine-Budde --- Documentation/networking/can.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst index 7650c4b5be5f..ccd321d29a8a 100644 --- a/Documentation/networking/can.rst +++ b/Documentation/networking/can.rst @@ -1398,10 +1398,9 @@ second bit timing has to be specified in order to enable the CAN FD bitrate. Additionally CAN FD capable CAN controllers support up to 64 bytes of payload. The representation of this length in can_frame.len and canfd_frame.len for userspace applications and inside the Linux network -layer is a plain value from 0 .. 64 instead of the CAN 'data length code'. -The data length code was a 1:1 mapping to the payload length in the Classical -CAN frames anyway. The payload length to the bus-relevant DLC mapping is -only performed inside the CAN drivers, preferably with the helper +layer is a plain value from 0 .. 64 instead of the Classical CAN length +which ranges from 0 to 8. The payload length to the bus-relevant DLC mapping +is only performed inside the CAN drivers, preferably with the helper functions can_fd_dlc2len() and can_fd_len2dlc(). The CAN netdevice driver capabilities can be distinguished by the network -- cgit v1.2.3 From b5746b3e8ea4a8a4df776e0864322028d4f5e4b1 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 13 Oct 2025 19:10:23 +0900 Subject: can: add Transmitter Delay Compensation (TDC) documentation Back in 2021, support for CAN TDC was added to the kernel in series [1] and in iproute2 in series [2]. However, the documentation was never updated. Add a new sub-section under CAN-FD driver support to document how to configure the TDC using the "ip tool". [1] add the netlink interface for CAN-FD Transmitter Delay Compensation (TDC) Link: https://lore.kernel.org/all/20210918095637.20108-1-mailhol.vincent@wanadoo.fr/ [2] iplink_can: cleaning, fixes and adding TDC support Link: https://lore.kernel.org/all/20211103164428.692722-1-mailhol.vincent@wanadoo.fr/ Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20251013-can-fd-doc-v2-2-5d53bdc8f2ad@kernel.org Signed-off-by: Marc Kleine-Budde --- Documentation/networking/can.rst | 64 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst index ccd321d29a8a..194e305ae973 100644 --- a/Documentation/networking/can.rst +++ b/Documentation/networking/can.rst @@ -1464,6 +1464,70 @@ Example when 'fd-non-iso on' is added on this switchable CAN FD adapter:: can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 +Transmitter Delay Compensation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +At high bit rates, the propagation delay from the TX pin to the RX pin of +the transceiver might become greater than the actual bit time causing +measurement errors: the RX pin would still be measuring the previous bit. + +The Transmitter Delay Compensation (thereafter, TDC) resolves this problem +by introducing a Secondary Sample Point (SSP) equal to the distance, in +minimum time quantum, from the start of the bit time on the TX pin to the +actual measurement on the RX pin. The SSP is calculated as the sum of two +configurable values: the TDC Value (TDCV) and the TDC offset (TDCO). + +TDC, if supported by the device, can be configured together with CAN-FD +using the ip tool's "tdc-mode" argument as follow: + +**omitted** + When no "tdc-mode" option is provided, the kernel will automatically + decide whether TDC should be turned on, in which case it will + calculate a default TDCO and use the TDCV as measured by the + device. This is the recommended method to use TDC. + +**"tdc-mode off"** + TDC is explicitly disabled. + +**"tdc-mode auto"** + The user must provide the "tdco" argument. The TDCV will be + automatically calculated by the device. This option is only + available if the device supports the TDC-AUTO CAN controller mode. + +**"tdc-mode manual"** + The user must provide both the "tdco" and "tdcv" arguments. This + option is only available if the device supports the TDC-MANUAL CAN + controller mode. + +Note that some devices may offer an additional parameter: "tdcf" (TDC Filter +window). If supported by your device, this can be added as an optional +argument to either "tdc-mode auto" or "tdc-mode manual". + +Example configuring a 500 kbit/s arbitration bitrate, a 5 Mbit/s data +bitrate, a TDCO of 15 minimum time quantum and a TDCV automatically measured +by the device:: + + $ ip link set can0 up type can bitrate 500000 \ + fd on dbitrate 4000000 \ + tdc-mode auto tdco 15 + $ ip -details link show can0 + 5: can0: mtu 72 qdisc pfifo_fast state UP \ + mode DEFAULT group default qlen 10 + link/can promiscuity 0 allmulti 0 minmtu 72 maxmtu 72 + can state ERROR-ACTIVE restart-ms 0 + bitrate 500000 sample-point 0.875 + tq 12 prop-seg 69 phase-seg1 70 phase-seg2 20 sjw 10 brp 1 + ES582.1/ES584.1: tseg1 2..256 tseg2 2..128 sjw 1..128 brp 1..512 \ + brp_inc 1 + dbitrate 4000000 dsample-point 0.750 + dtq 12 dprop-seg 7 dphase-seg1 7 dphase-seg2 5 dsjw 2 dbrp 1 + tdco 15 tdcf 0 + ES582.1/ES584.1: dtseg1 2..32 dtseg2 1..16 dsjw 1..8 dbrp 1..32 \ + dbrp_inc 1 + tdco 0..127 tdcf 0..127 + clock 80000000 + + Supported CAN Hardware ---------------------- -- cgit v1.2.3 From 93a27b5891b8194a8c083c9a80d2141d4bf47ba8 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 27 Sep 2025 21:11:16 +0900 Subject: can: j1939: add missing calls in NETDEV_UNREGISTER notification handler Currently NETDEV_UNREGISTER event handler is not calling j1939_cancel_active_session() and j1939_sk_queue_drop_all(). This will result in these calls being skipped when j1939_sk_release() is called. And I guess that the reason syzbot is still reporting unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 is caused by lack of these calls. Calling j1939_cancel_active_session(priv, sk) from j1939_sk_release() can be covered by calling j1939_cancel_active_session(priv, NULL) from j1939_netdev_notify(). Calling j1939_sk_queue_drop_all() from j1939_sk_release() can be covered by calling j1939_sk_netdev_event_netdown() from j1939_netdev_notify(). Therefore, we can reuse j1939_cancel_active_session(priv, NULL) and j1939_sk_netdev_event_netdown(priv) for NETDEV_UNREGISTER event handler. Fixes: 7fcbe5b2c6a4 ("can: j1939: implement NETDEV_UNREGISTER notification handler") Signed-off-by: Tetsuo Handa Tested-by: Oleksij Rempel Acked-by: Oleksij Rempel Link: https://patch.msgid.link/3ad3c7f8-5a74-4b07-a193-cb0725823558@I-love.SAKURA.ne.jp Signed-off-by: Marc Kleine-Budde --- net/can/j1939/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 3706a872ecaf..a93af55df5fd 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -378,6 +378,8 @@ static int j1939_netdev_notify(struct notifier_block *nb, j1939_ecu_unmap_all(priv); break; case NETDEV_UNREGISTER: + j1939_cancel_active_session(priv, NULL); + j1939_sk_netdev_event_netdown(priv); j1939_sk_netdev_event_unregister(priv); break; } -- cgit v1.2.3 From 604be9dad8f6a25a7afa898e29c6b6b4f9243200 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Mon, 6 Oct 2025 15:13:18 +0000 Subject: drm/xe: Fix comments in xe_gt struct Correct several spelling and grammar issues in xe_gt struct documentation to improve readability: - Fix "to not" -> "do not". - Fix "mmigrations" -> "migrations". - Fix "Multiple queues exists" -> "Multiple queues exist". - Fix "are be processed" -> "to be processed". - Fix "have being processed" -> "have been processed". These changes are purely cosmetic and do not affect functionality. v2: drop kernel-doc formatting change. (Jani) Cc: Jani Nikula Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20251006151317.2553182-2-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_gt_types.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 66158105aca5..8b5f604d7883 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -202,14 +202,14 @@ struct xe_gt { /** * @usm.bb_pool: Pool from which batchbuffers, for USM operations * (e.g. migrations, fixing page tables), are allocated. - * Dedicated pool needed so USM operations to not get blocked + * Dedicated pool needed so USM operations do not get blocked * behind any user operations which may have resulted in a * fault. */ struct xe_sa_manager *bb_pool; /** * @usm.reserved_bcs_instance: reserved BCS instance used for USM - * operations (e.g. mmigrations, fixing page tables) + * operations (e.g. migrations, fixing page tables) */ u16 reserved_bcs_instance; /** @usm.pf_wq: page fault work queue, unbound, high priority */ @@ -220,8 +220,8 @@ struct xe_gt { * @usm.pf_queue: Page fault queue used to sync faults so faults can * be processed not under the GuC CT lock. The queue is sized so * it can sync all possible faults (1 per physical engine). - * Multiple queues exists for page faults from different VMs are - * be processed in parallel. + * Multiple queues exist for page faults from different VMs to be + * processed in parallel. */ struct pf_queue { /** @usm.pf_queue.gt: back pointer to GT */ @@ -387,7 +387,7 @@ struct xe_gt { /** * @wa_active.oob_initialized: mark oob as initialized to help * detecting misuse of XE_GT_WA() - it can only be called on - * initialization after OOB WAs have being processed + * initialization after OOB WAs have been processed */ bool oob_initialized; } wa_active; -- cgit v1.2.3 From 55991d854f65d58cfe2c7d5219ffbd83d07c2577 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Sun, 12 Oct 2025 20:45:54 -0700 Subject: drm/xe: Fix build_pt_update_batch_sram for non-4K PAGE_SIZE The build_pt_update_batch_sram function in the Xe migrate layer assumes PAGE_SIZE == XE_PAGE_SIZE (4K), which is not a valid assumption on non-x86 platforms. This patch updates build_pt_update_batch_sram to correctly handle PAGE_SIZE > 4K by programming multiple 4K GPU pages per CPU page. v5: - Mask off non-address bits during compare Signed-off-by: Matthew Brost Tested-by: Simon Richter Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20251013034555.4121168-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7345a5b65169..216fc0ec2bb7 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1781,13 +1781,15 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, u32 size) { u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB]; + u64 gpu_page_size = 0x1ull << xe_pt_shift(0); u32 ptes; int i = 0; - ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); + ptes = DIV_ROUND_UP(size, gpu_page_size); while (ptes) { u32 chunk = min(MAX_PTE_PER_SDI, ptes); + chunk = ALIGN_DOWN(chunk, PAGE_SIZE / XE_PAGE_SIZE); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = pt_offset; bb->cs[bb->len++] = 0; @@ -1796,18 +1798,30 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes -= chunk; while (chunk--) { - u64 addr = sram_addr[i].addr & PAGE_MASK; + u64 addr = sram_addr[i].addr & ~(gpu_page_size - 1); + u64 pte, orig_addr = addr; xe_tile_assert(m->tile, sram_addr[i].proto == DRM_INTERCONNECT_SYSTEM); xe_tile_assert(m->tile, addr); - addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, - addr, pat_index, - 0, false, 0); - bb->cs[bb->len++] = lower_32_bits(addr); - bb->cs[bb->len++] = upper_32_bits(addr); - i++; +again: + pte = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, + addr, pat_index, + 0, false, 0); + bb->cs[bb->len++] = lower_32_bits(pte); + bb->cs[bb->len++] = upper_32_bits(pte); + + if (gpu_page_size < PAGE_SIZE) { + addr += XE_PAGE_SIZE; + if (orig_addr + PAGE_SIZE != addr) { + chunk--; + goto again; + } + i++; + } else { + i += gpu_page_size / PAGE_SIZE; + } } } } -- cgit v1.2.3 From dd83b101a4a65c212bacc52dea3b0b7131a2e88a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Sun, 12 Oct 2025 20:45:55 -0700 Subject: drm/xe: Enable 2M pages in xe_migrate_vram Using 2M pages in xe_migrate_vram has two benefits: we issue fewer instructions per 2M copy (1 vs. 512), and the cache hit rate should be higher. This results in increased copy engine bandwidth, as shown by benchmark IGTs. Enable 2M pages by reserving PDEs in the migrate VM and using 2M pages in xe_migrate_vram if the DMA address order matches 2M. v2: - Reuse build_pt_update_batch_sram (Stuart) - Fix build_pt_update_batch_sram for PAGE_SIZE > 4K v3: - More fixes for PAGE_SIZE > 4K, align chunk, decrement chunk as needed - Use stack incr var in xe_migrate_vram_use_pde (Stuart) v4: - Split PAGE_SIZE > 4K fix out in different patch (Stuart) Signed-off-by: Matthew Brost Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20251013034555.4121168-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 53 ++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 216fc0ec2bb7..4ca48dd1cfd8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -57,6 +57,13 @@ struct xe_migrate { u64 usm_batch_base_ofs; /** @cleared_mem_ofs: VM offset of @cleared_bo. */ u64 cleared_mem_ofs; + /** @large_page_copy_ofs: VM offset of 2M pages used for large copies */ + u64 large_page_copy_ofs; + /** + * @large_page_copy_pdes: BO offset to writeout 2M pages (PDEs) used for + * large copies + */ + u64 large_page_copy_pdes; /** * @fence: dma-fence representing the last migration job batch. * Protected by @job_mutex. @@ -288,6 +295,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, (i + 1) * 8, u64, entry); } + /* Reserve 2M PDEs */ + level = 1; + m->large_page_copy_ofs = NUM_PT_SLOTS << xe_pt_shift(level); + m->large_page_copy_pdes = map_ofs + XE_PAGE_SIZE * level + + NUM_PT_SLOTS * 8; + /* Set up a 1GiB NULL mapping at 255GiB offset. */ level = 2; xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64, @@ -1778,10 +1791,10 @@ static u32 pte_update_cmd_size(u64 size) static void build_pt_update_batch_sram(struct xe_migrate *m, struct xe_bb *bb, u32 pt_offset, struct drm_pagemap_addr *sram_addr, - u32 size) + u32 size, int level) { u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB]; - u64 gpu_page_size = 0x1ull << xe_pt_shift(0); + u64 gpu_page_size = 0x1ull << xe_pt_shift(level); u32 ptes; int i = 0; @@ -1808,7 +1821,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, again: pte = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, addr, pat_index, - 0, false, 0); + level, false, 0); bb->cs[bb->len++] = lower_32_bits(pte); bb->cs[bb->len++] = upper_32_bits(pte); @@ -1826,6 +1839,19 @@ again: } } +static bool xe_migrate_vram_use_pde(struct drm_pagemap_addr *sram_addr, + unsigned long size) +{ + u32 large_size = (0x1 << xe_pt_shift(1)); + unsigned long i, incr = large_size / PAGE_SIZE; + + for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE); i += incr) + if (PAGE_SIZE << sram_addr[i].order != large_size) + return false; + + return true; +} + enum xe_migrate_copy_dir { XE_MIGRATE_COPY_TO_VRAM, XE_MIGRATE_COPY_TO_SRAM, @@ -1855,6 +1881,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, PAGE_SIZE : 4; int err; unsigned long i, j; + bool use_pde = xe_migrate_vram_use_pde(sram_addr, len + sram_offset); if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) || (sram_offset | vram_addr) & XE_CACHELINE_MASK)) @@ -1879,7 +1906,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, * struct drm_pagemap_addr. Ensure this is the case even with higher * orders. */ - for (i = 0; i < npages;) { + for (i = 0; !use_pde && i < npages;) { unsigned int order = sram_addr[i].order; for (j = 1; j < NR_PAGES(order) && i + j < npages; j++) @@ -1889,16 +1916,26 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, i += NR_PAGES(order); } - build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, - sram_addr, len + sram_offset); + if (use_pde) + build_pt_update_batch_sram(m, bb, m->large_page_copy_pdes, + sram_addr, len + sram_offset, 1); + else + build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, + sram_addr, len + sram_offset, 0); if (dir == XE_MIGRATE_COPY_TO_VRAM) { - src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; + if (use_pde) + src_L0_ofs = m->large_page_copy_ofs + sram_offset; + else + src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; dst_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false); } else { src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false); - dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; + if (use_pde) + dst_L0_ofs = m->large_page_copy_ofs + sram_offset; + else + dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; } bb->cs[bb->len++] = MI_BATCH_BUFFER_END; -- cgit v1.2.3 From e5ae8d1eb08a3e27fff4ae264af4c8056d908639 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 12 Sep 2025 15:31:45 -0700 Subject: drm/xe: Increase global invalidation timeout to 1000us The previous timeout of 500us seems to be too small; panning the map in the Roll20 VTT in Firefox on a KDE/Wayland desktop reliably triggered timeouts within a few seconds of usage, causing the monitor to freeze and the following to be printed to dmesg: [Jul30 13:44] xe 0000:03:00.0: [drm] *ERROR* GT0: Global invalidation timeout [Jul30 13:48] xe 0000:03:00.0: [drm] *ERROR* [CRTC:82:pipe A] flip_done timed out I haven't hit a single timeout since increasing it to 1000us even after several multi-hour testing sessions. Fixes: 0dd2dd0182bc ("drm/xe: Move DSB l2 flush to a more sensible place") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5710 Signed-off-by: Kenneth Graunke Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Reviewed-by: Shuicheng Lin Link: https://lore.kernel.org/r/20250912223254.147940-1-kenneth@whitecape.org Signed-off-by: Lucas De Marchi (cherry picked from commit 146046907b56578263434107f5a7d5051847c459) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 2883b39c9b37..34d33965eac2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -1070,7 +1070,7 @@ void xe_device_l2_flush(struct xe_device *xe) spin_lock(>->global_invl_lock); xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); - if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) + if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 1000, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); spin_unlock(>->global_invl_lock); -- cgit v1.2.3 From 7ac74613e5f2ef3450f44fd2127198662c2563a9 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Oct 2025 04:06:18 -0700 Subject: drm/xe: Don't allow evicting of BOs in same VM in array of VM binds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An array of VM binds can potentially evict other buffer objects (BOs) within the same VM under certain conditions, which may lead to NULL pointer dereferences later in the bind pipeline. To prevent this, clear the allow_res_evict flag in the xe_bo_validate call. v2: - Invert polarity of no_res_evict (Thomas) - Add comment in code explaining issue (Thomas) Cc: stable@vger.kernel.org Reported-by: Paulo Zanoni Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6268 Fixes: 774b5fa509a9 ("drm/xe: Avoid evicting object of the same vm in none fault mode") Fixes: 77f2ef3f16f5 ("drm/xe: Lock all gpuva ops during VM bind IOCTL") Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Tested-by: Paulo Zanoni Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20251009110618.3481870-1-matthew.brost@intel.com (cherry picked from commit 8b9ba8d6d95fe75fed6b0480bb03da4b321bea08) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_vm.c | 32 +++++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_vm_types.h | 1 + 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 027e6ce648c5..f602b874e054 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2832,7 +2832,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, } static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, - bool validate) + bool res_evict, bool validate) { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); @@ -2843,7 +2843,8 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) err = xe_bo_validate(bo, vm, - !xe_vm_in_preempt_fence_mode(vm), exec); + !xe_vm_in_preempt_fence_mode(vm) && + res_evict, exec); } return err; @@ -2913,14 +2914,23 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) } static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, - struct xe_vma_op *op) + struct xe_vma_ops *vops, struct xe_vma_op *op) { int err = 0; + bool res_evict; + + /* + * We only allow evicting a BO within the VM if it is not part of an + * array of binds, as an array of binds can evict another BO within the + * bind. + */ + res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); switch (op->base.op) { case DRM_GPUVA_OP_MAP: if (!op->map.invalidate_on_bind) err = vma_lock_and_validate(exec, op->map.vma, + res_evict, !xe_vm_in_fault_mode(vm) || op->map.immediate); break; @@ -2931,11 +2941,13 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.remap.unmap->va), - false); + res_evict, false); if (!err && op->remap.prev) - err = vma_lock_and_validate(exec, op->remap.prev, true); + err = vma_lock_and_validate(exec, op->remap.prev, + res_evict, true); if (!err && op->remap.next) - err = vma_lock_and_validate(exec, op->remap.next, true); + err = vma_lock_and_validate(exec, op->remap.next, + res_evict, true); break; case DRM_GPUVA_OP_UNMAP: err = check_ufence(gpuva_to_vma(op->base.unmap.va)); @@ -2944,7 +2956,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.unmap.va), - false); + res_evict, false); break; case DRM_GPUVA_OP_PREFETCH: { @@ -2959,7 +2971,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.prefetch.va), - false); + res_evict, false); if (!err && !xe_vma_has_no_bo(vma)) err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region], @@ -3005,7 +3017,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, return err; list_for_each_entry(op, &vops->list, link) { - err = op_lock_and_prep(exec, vm, op); + err = op_lock_and_prep(exec, vm, vops, op); if (err) return err; } @@ -3638,6 +3650,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); + if (args->num_binds > 1) + vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index da39940501d8..413353e1c225 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -476,6 +476,7 @@ struct xe_vma_ops { /** @flag: signify the properties within xe_vma_ops*/ #define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) #define XE_VMA_OPS_FLAG_MADVISE BIT(1) +#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2) u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ -- cgit v1.2.3 From d30203739be798d3de5c84db3060e96f00c54e82 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 18 Sep 2025 13:58:57 -0700 Subject: drm/xe: Move rebar to be done earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There may be cases in which the BAR0 also needs to move to accommodate the bigger BAR2. However if it's not released, the BAR2 resize fails. During the vram probe it can't be released as it's already in use by xe_mmio for early register access. Add a new function in xe_vram and let xe_pci call it directly before even early device probe. This allows the BAR2 to resize in cases BAR0 also needs to move, assuming there aren't other reasons to hold that move: [] xe 0000:03:00.0: vgaarb: deactivate vga console [] xe 0000:03:00.0: [drm] Attempting to resize bar from 8192MiB -> 16384MiB [] xe 0000:03:00.0: BAR 0 [mem 0x83000000-0x83ffffff 64bit]: releasing [] xe 0000:03:00.0: BAR 2 [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] xe 0000:03:00.0: BAR 2 [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] xe 0000:03:00.0: BAR 0 [mem 0x83000000-0x83ffffff 64bit]: assigned [] pcieport 0000:00:01.0: PCI bridge to [bus 01-04] [] pcieport 0000:00:01.0: bridge window [mem 0x83000000-0x840fffff] [] pcieport 0000:00:01.0: bridge window [mem 0x4000000000-0x44007fffff 64bit pref] [] pcieport 0000:01:00.0: PCI bridge to [bus 02-04] [] pcieport 0000:01:00.0: bridge window [mem 0x83000000-0x840fffff] [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref] [] pcieport 0000:02:01.0: PCI bridge to [bus 03] [] pcieport 0000:02:01.0: bridge window [mem 0x83000000-0x83ffffff] [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref] [] xe 0000:03:00.0: [drm] BAR2 resized to 16384M [] xe 0000:03:00.0: [drm:xe_pci_probe [xe]] BATTLEMAGE e221:0000 dgfx:1 gfx:Xe2_HPG (20.02) ... For BMG there are additional fix needed in the PCI side, but this helps getting it to a working resize. All the rebar logic is more pci-specific than xe-specific and can be done very early in the probe sequence. In future it would be good to move it out of xe_vram.c, but this refactor is left for later. Cc: Ilpo Järvinen Cc: stable@vger.kernel.org # 6.12+ Link: https://lore.kernel.org/intel-xe/fafda2a3-fc63-ce97-d22b-803f771a4d19@linux.intel.com Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250918-xe-pci-rebar-2-v1-2-6c094702a074@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 45e33f220fd625492c11e15733d8e9b4f9db82a4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 2 ++ drivers/gpu/drm/xe/xe_vram.c | 34 ++++++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_vram.h | 1 + 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index be91343829dd..9a6df79fc5b6 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -867,6 +867,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; + xe_vram_resize_bar(xe); + err = xe_device_probe_early(xe); /* * In Boot Survivability mode, no drm card is exposed and driver diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index b44ebf50fedb..652df7a5f4f6 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -26,15 +26,35 @@ #define BAR_SIZE_SHIFT 20 -static void -_resize_bar(struct xe_device *xe, int resno, resource_size_t size) +/* + * Release all the BARs that could influence/block LMEMBAR resizing, i.e. + * assigned IORESOURCE_MEM_64 BARs + */ +static void release_bars(struct pci_dev *pdev) +{ + struct resource *res; + int i; + + pci_dev_for_each_resource(pdev, res, i) { + /* Resource already un-assigned, do not reset it */ + if (!res->parent) + continue; + + /* No need to release unrelated BARs */ + if (!(res->flags & IORESOURCE_MEM_64)) + continue; + + pci_release_resource(pdev, i); + } +} + +static void resize_bar(struct xe_device *xe, int resno, resource_size_t size) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int bar_size = pci_rebar_bytes_to_size(size); int ret; - if (pci_resource_len(pdev, resno)) - pci_release_resource(pdev, resno); + release_bars(pdev); ret = pci_resize_resource(pdev, resno, bar_size); if (ret) { @@ -50,7 +70,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) * if force_vram_bar_size is set, attempt to set to the requested size * else set to maximum possible size */ -static void resize_vram_bar(struct xe_device *xe) +void xe_vram_resize_bar(struct xe_device *xe) { int force_vram_bar_size = xe_modparam.force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -119,7 +139,7 @@ static void resize_vram_bar(struct xe_device *xe) pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); - _resize_bar(xe, LMEM_BAR, rebar_size); + resize_bar(xe, LMEM_BAR, rebar_size); pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); @@ -148,8 +168,6 @@ static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region * return -ENXIO; } - resize_vram_bar(xe); - lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); if (!lmem_bar->io_size) diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index 72860f714fc6..13505cfb184d 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -11,6 +11,7 @@ struct xe_device; struct xe_vram_region; +void xe_vram_resize_bar(struct xe_device *xe); int xe_vram_probe(struct xe_device *xe); struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement); -- cgit v1.2.3 From 1117e7d1e8e66bf7e40291178b829a8513f83a7a Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 10 Sep 2025 18:09:39 +0200 Subject: drm/xe/migrate: Fix an error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The exhaustive eviction accidently changed an error path goto to a return. Fix this. Fixes: 59eabff2a352 ("drm/xe: Convert xe_bo_create_pin_map() for exhaustive eviction") Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Francois Dugast Link: https://lore.kernel.org/r/20250910160939.103473-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 381f1ed15159c4b3f00dd37cc70924dedebeb111) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 1d667fa36cf3..569869a2b339 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -434,7 +434,7 @@ int xe_migrate_init(struct xe_migrate *m) err = xe_migrate_lock_prepare_vm(tile, m, vm); if (err) - return err; + goto err_out; if (xe->info.has_usm) { struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, -- cgit v1.2.3 From 7413e9f2be6b2b0caff9c517efa123d988914bba Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Oct 2025 06:06:29 -0700 Subject: drm/xe: Handle mixed mappings and existing VRAM on atomic faults Moving to VRAM will fail if mixed mappings are present or if the page is already located in VRAM. Atomic faults that require a move to VRAM currently retry without attempting to evict mixed mappings or locate existing VRAM mappings. This patch fixes the issue by attempting to evict mixed mappings or find existing VRAM pages when a move to VRAM fails during atomic fault handling. Fixes: a9ac0fa455b0 ("drm/xe: Strict migration policy for atomic SVM faults") Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251009130629.3531962-1-matthew.brost@intel.com (cherry picked from commit 75188605c56d10c1bd3b1cd94f4872f349c3a9c8) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 7e2db71ff34e..b268ee0d2271 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -1073,7 +1073,17 @@ retry: drm_dbg(&vm->xe->drm, "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", vm->usm.asid, ERR_PTR(err)); - goto retry; + + /* + * In the devmem-only case, mixed mappings may + * be found. The get_pages function will fix + * these up to a single location, allowing the + * page fault handler to make forward progress. + */ + if (ctx.devmem_only) + goto get_pages; + else + goto retry; } else { drm_err(&vm->xe->drm, "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", @@ -1083,6 +1093,7 @@ retry: } } +get_pages: get_pages_start = xe_svm_stats_ktime_get(); range_debug(range, "GET PAGES"); -- cgit v1.2.3 From 1852d27aa998272696680607b65a2ceac966104e Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 9 Oct 2025 18:10:47 -0700 Subject: drm/xe: Enable media sampler power gating Where applicable, enable media sampler power gating. Also, add it to the powergate_info debugfs. v2: Remove the sampler powergate status since it is cleared quickly anyway. v3: Use vcs mask (Rodrigo) and fix the version check for media v4: Remove extra spaces v5: Media samplers are independent of vcs mask, use Media version 1255 (Matt Roper) Fixes: 38e8c4184ea0 ("drm/xe: Enable Coarse Power Gating") Cc: Rodrigo Vivi Cc: Matt Roper Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Link: https://lore.kernel.org/r/20251010011047.2047584-1-vinay.belgaumkar@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 4cbc08649a54c3d533df9832342d52d409dfbbf0) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_gt_idle.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 06cb6b02ec64..51f2a03847f9 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -342,6 +342,7 @@ #define POWERGATE_ENABLE XE_REG(0xa210) #define RENDER_POWERGATE_ENABLE REG_BIT(0) #define MEDIA_POWERGATE_ENABLE REG_BIT(1) +#define MEDIA_SAMPLERS_POWERGATE_ENABLE REG_BIT(2) #define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n)) #define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n)) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index f8950a52d0a4..bdc9d9877ec4 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -124,6 +124,9 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (xe_gt_is_main_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + gtidle->powergate_enable |= MEDIA_SAMPLERS_POWERGATE_ENABLE; + if (xe->info.platform != XE_DG1) { for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if ((gt->info.engine_mask & BIT(i))) @@ -246,6 +249,11 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n, str_up_down(pg_status & media_slices[n].status_bit)); } + + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + drm_printf(p, "Media Samplers Power Gating Enabled: %s\n", + str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE)); + return 0; } -- cgit v1.2.3 From 9f64b3cd051b825de0a2a9f145c8e003200cedd5 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 10 Oct 2025 17:25:29 +0000 Subject: drm/xe/guc: Check GuC running state before deregistering exec queue In normal operation, a registered exec queue is disabled and deregistered through the GuC, and freed only after the GuC confirms completion. However, if the driver is forced to unbind while the exec queue is still running, the user may call exec_destroy() after the GuC has already been stopped and CT communication disabled. In this case, the driver cannot receive a response from the GuC, preventing proper cleanup of exec queue resources. Fix this by directly releasing the resources when GuC is not running. Here is the failure dmesg log: " [ 468.089581] ---[ end trace 0000000000000000 ]--- [ 468.089608] pci 0000:03:00.0: [drm] *ERROR* GT0: GUC ID manager unclean (1/65535) [ 468.090558] pci 0000:03:00.0: [drm] GT0: total 65535 [ 468.090562] pci 0000:03:00.0: [drm] GT0: used 1 [ 468.090564] pci 0000:03:00.0: [drm] GT0: range 1..1 (1) [ 468.092716] ------------[ cut here ]------------ [ 468.092719] WARNING: CPU: 14 PID: 4775 at drivers/gpu/drm/xe/xe_ttm_vram_mgr.c:298 ttm_vram_mgr_fini+0xf8/0x130 [xe] " v2: use xe_uc_fw_is_running() instead of xe_guc_ct_enabled(). As CT may go down and come back during VF migration. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20251010172529.2967639-2-shuicheng.lin@intel.com (cherry picked from commit 9b42321a02c50a12b2beb6ae9469606257fbecea) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_submit.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 53024eb5670b..94ed8159496f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -44,6 +44,7 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_trace.h" +#include "xe_uc_fw.h" #include "xe_vm.h" static struct xe_guc * @@ -1489,7 +1490,17 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); trace_xe_exec_queue_cleanup_entity(q); - if (exec_queue_registered(q)) + /* + * Expected state transitions for cleanup: + * - If the exec queue is registered and GuC firmware is running, we must first + * disable scheduling and deregister the queue to ensure proper teardown and + * resource release in the GuC, then destroy the exec queue on driver side. + * - If the GuC is already stopped (e.g., during driver unload or GPU reset), + * we cannot expect a response for the deregister request. In this case, + * it is safe to directly destroy the exec queue on driver side, as the GuC + * will not process further requests and all resources must be cleaned up locally. + */ + if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) disable_scheduling_deregister(guc, q); else __guc_exec_queue_destroy(guc, q); -- cgit v1.2.3 From 7e5a5983edda664e8e4bb20af17b80f5135c655c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 24 Sep 2025 16:10:38 +0100 Subject: btrfs: fix clearing of BTRFS_FS_RELOC_RUNNING if relocation already running When starting relocation, at reloc_chunk_start(), if we happen to find the flag BTRFS_FS_RELOC_RUNNING is already set we return an error (-EINPROGRESS) to the callers, however the callers call reloc_chunk_end() which will clear the flag BTRFS_FS_RELOC_RUNNING, which is wrong since relocation was started by another task and still running. Finding the BTRFS_FS_RELOC_RUNNING flag already set is an unexpected scenario, but still our current behaviour is not correct. Fix this by never calling reloc_chunk_end() if reloc_chunk_start() has returned an error, which is what logically makes sense, since the general widespread pattern is to have end functions called only if the counterpart start functions succeeded. This requires changing reloc_chunk_start() to clear BTRFS_FS_RELOC_RUNNING if there's a pending cancel request. Fixes: 907d2710d727 ("btrfs: add cancellable chunk relocation support") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Boris Burkov Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 8dd8de6b9fb8..0765e06d00b8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3780,6 +3780,7 @@ out: /* * Mark start of chunk relocation that is cancellable. Check if the cancellation * has been requested meanwhile and don't start in that case. + * NOTE: if this returns an error, reloc_chunk_end() must not be called. * * Return: * 0 success @@ -3796,10 +3797,8 @@ static int reloc_chunk_start(struct btrfs_fs_info *fs_info) if (atomic_read(&fs_info->reloc_cancel_req) > 0) { btrfs_info(fs_info, "chunk relocation canceled on start"); - /* - * On cancel, clear all requests but let the caller mark - * the end after cleanup operations. - */ + /* On cancel, clear all requests. */ + clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags); atomic_set(&fs_info->reloc_cancel_req, 0); return -ECANCELED; } @@ -3808,9 +3807,11 @@ static int reloc_chunk_start(struct btrfs_fs_info *fs_info) /* * Mark end of chunk relocation that is cancellable and wake any waiters. + * NOTE: call only if a previous call to reloc_chunk_start() succeeded. */ static void reloc_chunk_end(struct btrfs_fs_info *fs_info) { + ASSERT(test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)); /* Requested after start, clear bit first so any waiters can continue */ if (atomic_read(&fs_info->reloc_cancel_req) > 0) btrfs_info(fs_info, "chunk relocation canceled during operation"); @@ -4023,9 +4024,9 @@ out: if (err && rw) btrfs_dec_block_group_ro(rc->block_group); iput(rc->data_inode); + reloc_chunk_end(fs_info); out_put_bg: btrfs_put_block_group(bg); - reloc_chunk_end(fs_info); free_reloc_control(rc); return err; } @@ -4208,8 +4209,8 @@ out_clean: ret = ret2; out_unset: unset_reloc_control(rc); -out_end: reloc_chunk_end(fs_info); +out_end: free_reloc_control(rc); out: free_reloc_roots(&reloc_roots); -- cgit v1.2.3 From 53a4acbfc1de85fa637521ffab4f4e2ee03cbeeb Mon Sep 17 00:00:00 2001 From: Miquel Sabaté Solà Date: Thu, 25 Sep 2025 20:41:39 +0200 Subject: btrfs: fix memory leak on duplicated memory in the qgroup assign ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 'btrfs_ioctl_qgroup_assign' we first duplicate the argument as provided by the user, which is kfree'd in the end. But this was not the case when allocating memory for 'prealloc'. In this case, if it somehow failed, then the previous code would go directly into calling 'mnt_drop_write_file', without freeing the string duplicated from the user space. Fixes: 4addc1ffd67a ("btrfs: qgroup: preallocate memory before adding a relation") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Boris Burkov Reviewed-by: Filipe Manana Signed-off-by: Miquel Sabaté Solà Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a454b5ba2097..938286bee6a8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3740,7 +3740,7 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); if (!prealloc) { ret = -ENOMEM; - goto drop_write; + goto out; } } -- cgit v1.2.3 From b7fdfd29a136a17c5c8ad9e9bbf89c48919c3d19 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 26 Sep 2025 14:20:11 +0930 Subject: btrfs: only set the device specific options after devices are opened [BUG] With v6.17-rc kernels, btrfs will always set 'ssd' mount option even if the block device is not a rotating one: # cat /sys/block/sdd/queue/rotational 1 # cat /etc/fstab: LABEL=DATA2 /data2 btrfs rw,relatime,space_cache=v2,subvolid=5,subvol=/,nofail,nosuid,nodev 0 0 # mount [...] /dev/sdd on /data2 type btrfs (rw,nosuid,nodev,relatime,ssd,space_cache=v2,subvolid=5,subvol=/) [CAUSE] The 'ssd' mount option is set by set_device_specific_options(), and it expects that if there is any rotating device in the btrfs, it will set fs_devices::rotating. However after commit bddf57a70781 ("btrfs: delay btrfs_open_devices() until super block is created"), the device opening is delayed until the super block is created. But the timing of set_device_specific_options() is still left as is, this makes the function be called without any device opened. Since no device is opened, thus fs_devices::rotating will never be set, making btrfs incorrectly set 'ssd' mount option. [FIX] Only call set_device_specific_options() after btrfs_open_devices(). Also only call set_device_specific_options() after a new mount, if we're mounting a mounted btrfs, there is no need to set the device specific mount options again. Reported-by: HAN Yuwei Link: https://lore.kernel.org/linux-btrfs/C8FF75669DFFC3C5+5f93bf8a-80a0-48a6-81bf-4ec890abc99a@bupt.moe/ Fixes: bddf57a70781 ("btrfs: delay btrfs_open_devices() until super block is created") CC: stable@vger.kernel.org # 6.17 Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d6e496436539..aadc02374b2a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1900,8 +1900,6 @@ static int btrfs_get_tree_super(struct fs_context *fc) return PTR_ERR(sb); } - set_device_specific_options(fs_info); - if (sb->s_root) { /* * Not the first mount of the fs thus got an existing super block. @@ -1946,6 +1944,7 @@ static int btrfs_get_tree_super(struct fs_context *fc) deactivate_locked_super(sb); return -EACCES; } + set_device_specific_options(fs_info); bdev = fs_devices->latest_dev->bdev; snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id); -- cgit v1.2.3 From 42d3a055d946878a327ee030f0e0c7df0f0f15c8 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 30 Sep 2025 07:54:30 +0930 Subject: btrfs: do not use folio_test_partial_kmap() in ASSERT()s [BUG] Syzbot reported an ASSERT() triggered inside scrub: BTRFS info (device loop0): scrub: started on devid 1 assertion failed: !folio_test_partial_kmap(folio) :: 0, in fs/btrfs/scrub.c:697 ------------[ cut here ]------------ kernel BUG at fs/btrfs/scrub.c:697! Oops: invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 0 UID: 0 PID: 6077 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/18/2025 RIP: 0010:scrub_stripe_get_kaddr+0x1bb/0x1c0 fs/btrfs/scrub.c:697 Call Trace: scrub_bio_add_sector fs/btrfs/scrub.c:932 [inline] scrub_submit_initial_read+0xf21/0x1120 fs/btrfs/scrub.c:1897 submit_initial_group_read+0x423/0x5b0 fs/btrfs/scrub.c:1952 flush_scrub_stripes+0x18f/0x1150 fs/btrfs/scrub.c:1973 scrub_stripe+0xbea/0x2a30 fs/btrfs/scrub.c:2516 scrub_chunk+0x2a3/0x430 fs/btrfs/scrub.c:2575 scrub_enumerate_chunks+0xa70/0x1350 fs/btrfs/scrub.c:2839 btrfs_scrub_dev+0x6e7/0x10e0 fs/btrfs/scrub.c:3153 btrfs_ioctl_scrub+0x249/0x4b0 fs/btrfs/ioctl.c:3163 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f ---[ end trace 0000000000000000 ]--- Which doesn't make much sense, as all the folios we allocated for scrub should not be highmem. [CAUSE] Thankfully syzbot has a detailed kernel config file, showing that CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP is set to y. And that debug option will force all folio_test_partial_kmap() to return true, to improve coverage on highmem tests. But in our case we really just want to make sure the folios we allocated are not highmem (and they are indeed not). Such incorrect result from folio_test_partial_kmap() is just screwing up everything. [FIX] Replace folio_test_partial_kmap() to folio_test_highmem() so that we won't bother those highmem specific debuging options. Fixes: 5fbaae4b8567 ("btrfs: prepare scrub to support bs > ps cases") Reported-by: syzbot+bde59221318c592e6346@syzkaller.appspotmail.com Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 4691d0bdb2e8..651b11884f82 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -694,7 +694,7 @@ static void *scrub_stripe_get_kaddr(struct scrub_stripe *stripe, int sector_nr) /* stripe->folios[] is allocated by us and no highmem is allowed. */ ASSERT(folio); - ASSERT(!folio_test_partial_kmap(folio)); + ASSERT(!folio_test_highmem(folio)); return folio_address(folio) + offset_in_folio(folio, offset); } @@ -707,7 +707,7 @@ static phys_addr_t scrub_stripe_get_paddr(struct scrub_stripe *stripe, int secto /* stripe->folios[] is allocated by us and no highmem is allowed. */ ASSERT(folio); - ASSERT(!folio_test_partial_kmap(folio)); + ASSERT(!folio_test_highmem(folio)); /* And the range must be contained inside the folio. */ ASSERT(offset_in_folio(folio, offset) + fs_info->sectorsize <= folio_size(folio)); return page_to_phys(folio_page(folio, 0)) + offset_in_folio(folio, offset); -- cgit v1.2.3 From a5a51bf4e9b7354ce7cd697e610d72c1b33fd949 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 1 Oct 2025 11:08:13 +0100 Subject: btrfs: do not assert we found block group item when creating free space tree Currently, when building a free space tree at populate_free_space_tree(), if we are not using the block group tree feature, we always expect to find block group items (either extent items or a block group item with key type BTRFS_BLOCK_GROUP_ITEM_KEY) when we search the extent tree with btrfs_search_slot_for_read(), so we assert that we found an item. However this expectation is wrong since we can have a new block group created in the current transaction which is still empty and for which we still have not added the block group's item to the extent tree, in which case we do not have any items in the extent tree associated to the block group. The insertion of a new block group's block group item in the extent tree happens at btrfs_create_pending_block_groups() when it calls the helper insert_block_group_item(). This typically is done when a transaction handle is released, committed or when running delayed refs (either as part of a transaction commit or when serving tickets for space reservation if we are low on free space). So remove the assertion at populate_free_space_tree() even when the block group tree feature is not enabled and update the comment to mention this case. Syzbot reported this with the following stack trace: BTRFS info (device loop3 state M): rebuilding free space tree assertion failed: ret == 0 :: 0, in fs/btrfs/free-space-tree.c:1115 ------------[ cut here ]------------ kernel BUG at fs/btrfs/free-space-tree.c:1115! Oops: invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 1 UID: 0 PID: 6352 Comm: syz.3.25 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/18/2025 RIP: 0010:populate_free_space_tree+0x700/0x710 fs/btrfs/free-space-tree.c:1115 Code: ff ff e8 d3 (...) RSP: 0018:ffffc9000430f780 EFLAGS: 00010246 RAX: 0000000000000043 RBX: ffff88805b709630 RCX: fea61d0e2e79d000 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: ffffc9000430f8b0 R08: ffffc9000430f4a7 R09: 1ffff92000861e94 R10: dffffc0000000000 R11: fffff52000861e95 R12: 0000000000000001 R13: 1ffff92000861f00 R14: dffffc0000000000 R15: 0000000000000000 FS: 00007f424d9fe6c0(0000) GS:ffff888125afc000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd78ad212c0 CR3: 0000000076d68000 CR4: 00000000003526f0 Call Trace: btrfs_rebuild_free_space_tree+0x1ba/0x6d0 fs/btrfs/free-space-tree.c:1364 btrfs_start_pre_rw_mount+0x128f/0x1bf0 fs/btrfs/disk-io.c:3062 btrfs_remount_rw fs/btrfs/super.c:1334 [inline] btrfs_reconfigure+0xaed/0x2160 fs/btrfs/super.c:1559 reconfigure_super+0x227/0x890 fs/super.c:1076 do_remount fs/namespace.c:3279 [inline] path_mount+0xd1a/0xfe0 fs/namespace.c:4027 do_mount fs/namespace.c:4048 [inline] __do_sys_mount fs/namespace.c:4236 [inline] __se_sys_mount+0x313/0x410 fs/namespace.c:4213 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f424e39066a Code: d8 64 89 02 (...) RSP: 002b:00007f424d9fde68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 00007f424d9fdef0 RCX: 00007f424e39066a RDX: 0000200000000180 RSI: 0000200000000380 RDI: 0000000000000000 RBP: 0000200000000180 R08: 00007f424d9fdef0 R09: 0000000000000020 R10: 0000000000000020 R11: 0000000000000246 R12: 0000200000000380 R13: 00007f424d9fdeb0 R14: 0000000000000000 R15: 00002000000002c0 Modules linked in: ---[ end trace 0000000000000000 ]--- Reported-by: syzbot+884dc4621377ba579a6f@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/68dc3dab.a00a0220.102ee.004e.GAE@google.com/ Fixes: a5ed91828518 ("Btrfs: implement the free space B-tree") CC: # 6.1.x: 1961d20f6fa8: btrfs: fix assertion when building free space tree CC: # 6.1.x Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/free-space-tree.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index dad0b492a663..d86541073d42 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1106,14 +1106,15 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, * If ret is 1 (no key found), it means this is an empty block group, * without any extents allocated from it and there's no block group * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree - * because we are using the block group tree feature, so block group - * items are stored in the block group tree. It also means there are no - * extents allocated for block groups with a start offset beyond this - * block group's end offset (this is the last, highest, block group). + * because we are using the block group tree feature (so block group + * items are stored in the block group tree) or this is a new block + * group created in the current transaction and its block group item + * was not yet inserted in the extent tree (that happens in + * btrfs_create_pending_block_groups() -> insert_block_group_item()). + * It also means there are no extents allocated for block groups with a + * start offset beyond this block group's end offset (this is the last, + * highest, block group). */ - if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE)) - ASSERT(ret == 0); - start = block_group->start; end = block_group->start + block_group->length; while (ret == 0) { -- cgit v1.2.3 From 8ab2fa69691b2913a67f3c54fbb991247b3755be Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Tue, 30 Sep 2025 21:05:17 -0700 Subject: btrfs: fix incorrect readahead expansion length The intent of btrfs_readahead_expand() was to expand to the length of the current compressed extent being read. However, "ram_bytes" is *not* that, in the case where a single physical compressed extent is used for multiple file extents. Consider this case with a large compressed extent C and then later two non-compressed extents N1 and N2 written over C, leaving C1 and C2 pointing to offset/len pairs of C: [ C ] [ N1 ][ C1 ][ N2 ][ C2 ] In such a case, ram_bytes for both C1 and C2 is the full uncompressed length of C. So starting readahead in C1 will expand the readahead past the end of C1, past N2, and into C2. This will then expand readahead again, to C2_start + ram_bytes, way past EOF. First of all, this is totally undesirable, we don't want to read the whole file in arbitrary chunks of the large underlying extent if it happens to exist. Secondly, it results in zeroing the range past the end of C2 up to ram_bytes. This is particularly unpleasant with fs-verity as it can zero and set uptodate pages in the verity virtual space past EOF. This incorrect readahead behavior can lead to verity verification errors, if we iterate in a way that happens to do the wrong readahead. Fix this by using em->len for readahead expansion, not em->ram_bytes, resulting in the expected behavior of stopping readahead at the extent boundary. Reported-by: Max Chernoff Link: https://bugzilla.redhat.com/show_bug.cgi?id=2399898 Fixes: 9e9ff875e417 ("btrfs: use readahead_expand() on compressed extents") CC: stable@vger.kernel.org # 6.17 Reviewed-by: Filipe Manana Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c123a3ef154a..755ec6dfd51c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -973,7 +973,7 @@ static void btrfs_readahead_expand(struct readahead_control *ractl, { const u64 ra_pos = readahead_pos(ractl); const u64 ra_end = ra_pos + readahead_length(ractl); - const u64 em_end = em->start + em->ram_bytes; + const u64 em_end = em->start + em->len; /* No expansion for holes and inline extents. */ if (em->disk_bytenr > EXTENT_MAP_LAST_BYTE) -- cgit v1.2.3 From fec9b9d3ced39f16be8d7afdf81f4dd2653da319 Mon Sep 17 00:00:00 2001 From: Miquel Sabaté Solà Date: Wed, 8 Oct 2025 14:18:59 +0200 Subject: btrfs: fix memory leaks when rejecting a non SINGLE data profile without an RST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the end of btrfs_load_block_group_zone_info() the first thing we do is to ensure that if the mapping type is not a SINGLE one and there is no RAID stripe tree, then we return early with an error. Doing that, though, prevents the code from running the last calls from this function which are about freeing memory allocated during its run. Hence, in this case, instead of returning early, we set the ret value and fall through the rest of the cleanup code. Fixes: 5906333cc4af ("btrfs: zoned: don't skip block group profile checks on conventional zones") CC: stable@vger.kernel.org # 6.8+ Reviewed-by: Johannes Thumshirn Signed-off-by: Miquel Sabaté Solà Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index e3341a84f4ab..838149fa60ce 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1753,7 +1753,7 @@ out: !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", btrfs_bg_type_to_raid_name(map->type)); - return -EINVAL; + ret = -EINVAL; } if (unlikely(cache->alloc_offset > cache->zone_capacity)) { -- cgit v1.2.3 From e92c2941204de7b62e9c2deecfeb9eaefe54a22a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Oct 2025 18:08:58 +0300 Subject: btrfs: tree-checker: fix bounds check in check_inode_extref() The parentheses for the unlikely() annotation were put in the wrong place so it means that the condition is basically never true and the bounds checking is skipped. Fixes: aab9458b9f00 ("btrfs: tree-checker: add inode extref checks") Signed-off-by: Dan Carpenter Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index ca30b15ea452..c10b4c242acf 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1797,7 +1797,7 @@ static int check_inode_extref(struct extent_buffer *leaf, struct btrfs_inode_extref *extref = (struct btrfs_inode_extref *)ptr; u16 namelen; - if (unlikely(ptr + sizeof(*extref)) > end) { + if (unlikely(ptr + sizeof(*extref) > end)) { inode_ref_err(leaf, slot, "inode extref overflow, ptr %lu end %lu inode_extref size %zu", ptr, end, sizeof(*extref)); -- cgit v1.2.3 From 8aec9dbf2db2e958de5bd20e23b8fbb8f2aa1fa6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 3 Oct 2025 15:11:06 +0100 Subject: btrfs: send: fix -Wflex-array-member-not-at-end warning in struct send_ctx The warning -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Fix the following warning: fs/btrfs/send.c:181:24: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] and move the declaration of send_ctx::cur_inode_path to the end. Notice that struct fs_path contains a flexible array member inline_buf, but also a padding array and a limit calculated for the usable space of inline_buf (FS_PATH_INLINE_SIZE). It is not the pattern where flexible array is in the middle of a structure and could potentially overwrite other members. Signed-off-by: Gustavo A. R. Silva Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/send.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9230e5066fc6..6144e66661f5 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -178,7 +178,6 @@ struct send_ctx { u64 cur_inode_rdev; u64 cur_inode_last_extent; u64 cur_inode_next_write_offset; - struct fs_path cur_inode_path; bool cur_inode_new; bool cur_inode_new_gen; bool cur_inode_deleted; @@ -305,6 +304,9 @@ struct send_ctx { struct btrfs_lru_cache dir_created_cache; struct btrfs_lru_cache dir_utimes_cache; + + /* Must be last as it ends in a flexible-array member. */ + struct fs_path cur_inode_path; }; struct pending_dir_move { -- cgit v1.2.3 From a375246fcf2bbdaeb1df7fa7ee5a8b884a89085e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 9 Oct 2025 08:40:01 -0700 Subject: cxl/features: Add check for no entries in cxl_feature_info cxl EDAC calls cxl_feature_info() to get the feature information and if the hardware has no Features support, cxlfs may be passed in as NULL. [ 51.957498] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 51.965571] #PF: supervisor read access in kernel mode [ 51.971559] #PF: error_code(0x0000) - not-present page [ 51.977542] PGD 17e4f6067 P4D 0 [ 51.981384] Oops: Oops: 0000 [#1] SMP NOPTI [ 51.986300] CPU: 49 UID: 0 PID: 3782 Comm: systemd-udevd Not tainted 6.17.0dj test+ #64 PREEMPT(voluntary) [ 51.997355] Hardware name: [ 52.009790] RIP: 0010:cxl_feature_info+0xa/0x80 [cxl_core] Add a check for cxlfs before dereferencing it and return -EOPNOTSUPP if there is no cxlfs created due to no hardware support. Fixes: eb5dfcb9e36d ("cxl: Add support to handle user feature commands for set feature") Reviewed-by: Davidlohr Bueso Reviewed-by: Alison Schofield Signed-off-by: Dave Jiang --- drivers/cxl/core/features.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c index 7c750599ea69..4bc484b46f43 100644 --- a/drivers/cxl/core/features.c +++ b/drivers/cxl/core/features.c @@ -371,6 +371,9 @@ cxl_feature_info(struct cxl_features_state *cxlfs, { struct cxl_feat_entry *feat; + if (!cxlfs || !cxlfs->entries) + return ERR_PTR(-EOPNOTSUPP); + for (int i = 0; i < cxlfs->entries->num_features; i++) { feat = &cxlfs->entries->ent[i]; if (uuid_equal(uuid, &feat->uuid)) -- cgit v1.2.3 From f25785f9b088ed65089dd0d0034da52858417839 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sun, 5 Oct 2025 23:48:05 -0400 Subject: x86/mm: Fix overflow in __cpa_addr() The change to have cpa_flush() call flush_kernel_pages() introduced a bug where __cpa_addr() can access an address one larger than the largest one in the cpa->pages array. KASAN reports the issue like this: BUG: KASAN: slab-out-of-bounds in __cpa_addr arch/x86/mm/pat/set_memory.c:309 [inline] BUG: KASAN: slab-out-of-bounds in __cpa_addr+0x1d3/0x220 arch/x86/mm/pat/set_memory.c:306 Read of size 8 at addr ffff88801f75e8f8 by task syz.0.17/5978 This bug could cause cpa_flush() to not properly flush memory, which somehow never showed any symptoms in my tests, possibly because cpa_flush() is called so rarely, but could potentially cause issues for other people. Fix the issue by directly calculating the flush end address from the start address. Fixes: 86e6815b316e ("x86/mm: Change cpa_flush() to call flush_kernel_range() directly") Reported-by: syzbot+afec6555eef563c66c97@syzkaller.appspotmail.com Signed-off-by: Rik van Riel Signed-off-by: Dave Hansen Reviewed-by: Kiryl Shutsemau Link: https://lore.kernel.org/all/68e2ff90.050a0220.2c17c1.0038.GAE@google.com/ --- arch/x86/mm/pat/set_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index d2d54b8c4dbb..970981893c9b 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -446,7 +446,7 @@ static void cpa_flush(struct cpa_data *cpa, int cache) } start = fix_addr(__cpa_addr(cpa, 0)); - end = fix_addr(__cpa_addr(cpa, cpa->numpages)); + end = start + cpa->numpages * PAGE_SIZE; if (cpa->force_flush_all) end = TLB_FLUSH_ALL; -- cgit v1.2.3 From 83b0177a6c4889b3a6e865da5e21b2c9d97d0551 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 May 2025 15:43:04 +0200 Subject: x86/mm: Fix SMP ordering in switch_mm_irqs_off() Stephen noted that it is possible to not have an smp_mb() between the loaded_mm store and the tlb_gen load in switch_mm(), meaning the ordering against flush_tlb_mm_range() goes out the window, and it becomes possible for switch_mm() to not observe a recent tlb_gen update and fail to flush the TLBs. [ dhansen: merge conflict fixed by Ingo ] Fixes: 209954cbc7d0 ("x86/mm/tlb: Update mm_cpumask lazily") Reported-by: Stephen Dolan Closes: https://lore.kernel.org/all/CAHDw0oGd0B4=uuv8NGqbUQ_ZVmSheU2bN70e4QhFXWvuAZdt2w@mail.gmail.com/ Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Signed-off-by: Dave Hansen --- arch/x86/mm/tlb.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 39f80111e6f1..5d221709353e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -911,11 +911,31 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, * CR3 and cpu_tlbstate.loaded_mm are not all in sync. */ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); - barrier(); - /* Start receiving IPIs and then read tlb_gen (and LAM below) */ + /* + * Make sure this CPU is set in mm_cpumask() such that we'll + * receive invalidation IPIs. + * + * Rely on the smp_mb() implied by cpumask_set_cpu()'s atomic + * operation, or explicitly provide one. Such that: + * + * switch_mm_irqs_off() flush_tlb_mm_range() + * smp_store_release(loaded_mm, SWITCHING); atomic64_inc_return(tlb_gen) + * smp_mb(); // here // smp_mb() implied + * atomic64_read(tlb_gen); this_cpu_read(loaded_mm); + * + * we properly order against flush_tlb_mm_range(), where the + * loaded_mm load can happen in mative_flush_tlb_multi() -> + * should_flush_tlb(). + * + * This way switch_mm() must see the new tlb_gen or + * flush_tlb_mm_range() must see the new loaded_mm, or both. + */ if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next))) cpumask_set_cpu(cpu, mm_cpumask(next)); + else + smp_mb(); + next_tlb_gen = atomic64_read(&next->context.tlb_gen); ns = choose_new_asid(next, next_tlb_gen); -- cgit v1.2.3 From d6fc45100aa8c02be3ddd16fae569b84086c15a9 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Fri, 10 Oct 2025 22:43:07 +0800 Subject: PCI: cadence: Search for MSI Capability with correct ID 907912c1daa7 ("PCI: cadence: Use cdns_pcie_find_*capability() to avoid hardcoding offsets") incorrectly searched for the MSI-X Capability ID instead of the MSI Capability ID in cdns_pcie_ep_get_msi(). Search for PCI_CAP_ID_MSI, not PCI_CAP_ID_MSIX, to fix this problem. Fixes: 907912c1daa7 ("PCI: cadence: Use cdns_pcie_find_*capability() to avoid hardcoding offsets") Reported-by: Sasha Levin Closes: https://lore.kernel.org/r/aOfMk9BW8BH2P30V@laps/ Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20251010144307.12979-1-18255117159@163.com --- drivers/pci/controller/cadence/pcie-cadence-ep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/cadence/pcie-cadence-ep.c b/drivers/pci/controller/cadence/pcie-cadence-ep.c index 1eac012a8226..c0e1194a936b 100644 --- a/drivers/pci/controller/cadence/pcie-cadence-ep.c +++ b/drivers/pci/controller/cadence/pcie-cadence-ep.c @@ -255,7 +255,7 @@ static int cdns_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn) u16 flags, mme; u8 cap; - cap = cdns_pcie_find_capability(pcie, PCI_CAP_ID_MSIX); + cap = cdns_pcie_find_capability(pcie, PCI_CAP_ID_MSI); fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn); /* Validate that the MSI feature is actually enabled. */ -- cgit v1.2.3 From 1ee889fdf409ce68c1e3b62912333a5cc69acaa0 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 1 Oct 2025 01:29:57 +0200 Subject: f2fs: don't call iput() from f2fs_drop_inode() iput() calls the problematic routine, which does a ->i_count inc/dec cycle. Undoing it with iput() recurses into the problem. Note f2fs should not be playing games with the refcount to begin with, but that will be handled later. Right now solve the immediate regression. Fixes: bc986b1d756482a ("fs: stop accessing ->i_count directly in f2fs and gfs2") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202509301450.138b448f-lkp@intel.com Signed-off-by: Mateusz Guzik Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fd8e7b0b2166..db7afb806411 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1820,7 +1820,7 @@ static int f2fs_drop_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); spin_lock(&inode->i_lock); - iput(inode); + atomic_dec(&inode->i_count); } trace_f2fs_drop_inode(inode, 0); return 0; -- cgit v1.2.3 From 9d5c4f5c7a2c7677e1b3942772122b032c265aae Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 Oct 2025 03:32:30 +0000 Subject: f2fs: fix wrong block mapping for multi-devices Assuming the disk layout as below, disk0: 0 --- 0x00035abfff disk1: 0x00035ac000 --- 0x00037abfff disk2: 0x00037ac000 --- 0x00037ebfff and we want to read data from offset=13568 having len=128 across the block devices, we can illustrate the block addresses like below. 0 .. 0x00037ac000 ------------------- 0x00037ebfff, 0x00037ec000 ------- | ^ ^ ^ | fofs 0 13568 13568+128 | ------------------------------------------------------ | LBA 0x37e8aa9 0x37ebfa9 0x37ec029 --- map 0x3caa9 0x3ffa9 In this example, we should give the relative map of the target block device ranging from 0x3caa9 to 0x3ffa9 where the length should be calculated by 0x37ebfff + 1 - 0x37ebfa9. In the below equation, however, map->m_pblk was supposed to be the original address instead of the one from the target block address. - map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk); Cc: stable@vger.kernel.org Fixes: 71f2c8206202 ("f2fs: multidevice: support direct IO") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ef38e62cda8f..775aa4f63aa3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1497,8 +1497,8 @@ static bool f2fs_map_blocks_cached(struct inode *inode, struct f2fs_dev_info *dev = &sbi->devs[bidx]; map->m_bdev = dev->bdev; - map->m_pblk -= dev->start_blk; map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk); + map->m_pblk -= dev->start_blk; } else { map->m_bdev = inode->i_sb->s_bdev; } -- cgit v1.2.3 From fcb8b32a68fd40b0440cb9468cf6f6ab9de9f3c5 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 8 Oct 2025 16:14:45 +0200 Subject: dpll: zl3073x: Handle missing or corrupted flash configuration If the internal flash contains missing or corrupted configuration, basic communication over the bus still functions, but the device is not capable of normal operation (for example, using mailboxes). This condition is indicated in the info register by the ready bit. If this bit is cleared, the probe procedure times out while fetching the device state. Handle this case by checking the ready bit value in zl3073x_dev_start() and skipping DPLL device and pin registration if it is cleared. Do not report this condition as an error, allowing the devlink device to be registered and enabling the user to flash the correct configuration. Prior this patch: [ 31.112299] zl3073x-i2c 1-0070: Failed to fetch input state: -ETIMEDOUT [ 31.116332] zl3073x-i2c 1-0070: error -ETIMEDOUT: Failed to start device [ 31.136881] zl3073x-i2c 1-0070: probe with driver zl3073x-i2c failed with error -110 After this patch: [ 41.011438] zl3073x-i2c 1-0070: FW not fully ready - missing or corrupted config Fixes: 75a71ecc24125 ("dpll: zl3073x: Register DPLL devices and pins") Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251008141445.841113-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/core.c | 21 +++++++++++++++++++++ drivers/dpll/zl3073x/regs.h | 3 +++ 2 files changed, 24 insertions(+) diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 092e7027948a..e42e527813cf 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -1038,8 +1038,29 @@ zl3073x_dev_phase_meas_setup(struct zl3073x_dev *zldev) int zl3073x_dev_start(struct zl3073x_dev *zldev, bool full) { struct zl3073x_dpll *zldpll; + u8 info; int rc; + rc = zl3073x_read_u8(zldev, ZL_REG_INFO, &info); + if (rc) { + dev_err(zldev->dev, "Failed to read device status info\n"); + return rc; + } + + if (!FIELD_GET(ZL_INFO_READY, info)) { + /* The ready bit indicates that the firmware was successfully + * configured and is ready for normal operation. If it is + * cleared then the configuration stored in flash is wrong + * or missing. In this situation the driver will expose + * only devlink interface to give an opportunity to flash + * the correct config. + */ + dev_info(zldev->dev, + "FW not fully ready - missing or corrupted config\n"); + + return 0; + } + if (full) { /* Fetch device state */ rc = zl3073x_dev_state_fetch(zldev); diff --git a/drivers/dpll/zl3073x/regs.h b/drivers/dpll/zl3073x/regs.h index 19a25325bd9c..d837bee72b17 100644 --- a/drivers/dpll/zl3073x/regs.h +++ b/drivers/dpll/zl3073x/regs.h @@ -67,6 +67,9 @@ * Register Page 0, General **************************/ +#define ZL_REG_INFO ZL_REG(0, 0x00, 1) +#define ZL_INFO_READY BIT(7) + #define ZL_REG_ID ZL_REG(0, 0x01, 2) #define ZL_REG_REVISION ZL_REG(0, 0x03, 2) #define ZL_REG_FW_VER ZL_REG(0, 0x05, 2) -- cgit v1.2.3 From 25718fdcbdd2dadd15fc8b684df59b43970b91ed Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 9 Oct 2025 11:43:38 +0200 Subject: net: gro_cells: Use nested-BH locking for gro_cell The gro_cell data structure is per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Add a local_lock_t to the data structure and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Reported-by: syzbot+8715dd783e9b0bef43b1@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68c6c3b1.050a0220.2ff435.0382.GAE@google.com/ Fixes: 3253cb49cbad ("softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT") Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20251009094338.j1jyKfjR@linutronix.de Signed-off-by: Jakub Kicinski --- net/core/gro_cells.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index ff8e5b64bf6b..b43911562f4d 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -8,11 +8,13 @@ struct gro_cell { struct sk_buff_head napi_skbs; struct napi_struct napi; + local_lock_t bh_lock; }; int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) { struct net_device *dev = skb->dev; + bool have_bh_lock = false; struct gro_cell *cell; int res; @@ -25,6 +27,8 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) goto unlock; } + local_lock_nested_bh(&gcells->cells->bh_lock); + have_bh_lock = true; cell = this_cpu_ptr(gcells->cells); if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(net_hotdata.max_backlog)) { @@ -39,6 +43,9 @@ drop: if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); + if (have_bh_lock) + local_unlock_nested_bh(&gcells->cells->bh_lock); + res = NET_RX_SUCCESS; unlock: @@ -54,6 +61,7 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) struct sk_buff *skb; int work_done = 0; + __local_lock_nested_bh(&cell->bh_lock); while (work_done < budget) { skb = __skb_dequeue(&cell->napi_skbs); if (!skb) @@ -64,6 +72,7 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) if (work_done < budget) napi_complete_done(napi, work_done); + __local_unlock_nested_bh(&cell->bh_lock); return work_done; } @@ -79,6 +88,7 @@ int gro_cells_init(struct gro_cells *gcells, struct net_device *dev) struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); __skb_queue_head_init(&cell->napi_skbs); + local_lock_init(&cell->bh_lock); set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); -- cgit v1.2.3 From 70f92ab97042f243e1c8da1c457ff56b9b3e49f1 Mon Sep 17 00:00:00 2001 From: Linmao Li Date: Thu, 9 Oct 2025 20:25:49 +0800 Subject: r8169: fix packet truncation after S4 resume on RTL8168H/RTL8111H After resume from S4 (hibernate), RTL8168H/RTL8111H truncates incoming packets. Packet captures show messages like "IP truncated-ip - 146 bytes missing!". The issue is caused by RxConfig not being properly re-initialized after resume. Re-initializing the RxConfig register before the chip re-initialization sequence avoids the truncation and restores correct packet reception. This follows the same pattern as commit ef9da46ddef0 ("r8169: fix data corruption issue on RTL8402"). Fixes: 6e1d0b898818 ("r8169:add support for RTL8168H and RTL8107E") Signed-off-by: Linmao Li Reviewed-by: Jacob Keller Reviewed-by: Heiner Kallweit Link: https://patch.msgid.link/20251009122549.3955845-1-lilinmao@kylinos.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 8903ae90afcb..d18734fe12e4 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4994,8 +4994,9 @@ static int rtl8169_resume(struct device *device) if (!device_may_wakeup(tp_to_dev(tp))) clk_prepare_enable(tp->clk); - /* Reportedly at least Asus X453MA truncates packets otherwise */ - if (tp->mac_version == RTL_GIGA_MAC_VER_37) + /* Some chip versions may truncate packets without this initialization */ + if (tp->mac_version == RTL_GIGA_MAC_VER_37 || + tp->mac_version == RTL_GIGA_MAC_VER_46) rtl_init_rxcfg(tp); return rtl8169_runtime_resume(device); -- cgit v1.2.3 From e4d0c909bf8328d986bf3aadba0c33a72b5ae30d Mon Sep 17 00:00:00 2001 From: Kamil Horák - 2N Date: Thu, 9 Oct 2025 15:06:56 +0200 Subject: net: phy: bcm54811: Fix GMII/MII/MII-Lite selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Broadcom bcm54811 is hardware-strapped to select among RGMII and GMII/MII/MII-Lite modes. However, the corresponding bit, RGMII Enable in Miscellaneous Control Register must be also set to select desired RGMII or MII(-lite)/GMII mode. Fixes: 3117a11fff5af9e7 ("net: phy: bcm54811: PHY initialization") Signed-off-by: Kamil Horák - 2N Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251009130656.1308237-2-kamilh@axis.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/broadcom.c | 20 +++++++++++++++++++- include/linux/brcmphy.h | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 3459a0e9d8b9..cb306f9e80cc 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -405,7 +405,7 @@ static int bcm5481x_set_brrmode(struct phy_device *phydev, bool on) static int bcm54811_config_init(struct phy_device *phydev) { struct bcm54xx_phy_priv *priv = phydev->priv; - int err, reg, exp_sync_ethernet; + int err, reg, exp_sync_ethernet, aux_rgmii_en; /* Enable CLK125 MUX on LED4 if ref clock is enabled. */ if (!(phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED)) { @@ -434,6 +434,24 @@ static int bcm54811_config_init(struct phy_device *phydev) if (err < 0) return err; + /* Enable RGMII if configured */ + if (phy_interface_is_rgmii(phydev)) + aux_rgmii_en = MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_EN | + MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; + else + aux_rgmii_en = 0; + + /* Also writing Reserved bits 6:5 because the documentation requires + * them to be written to 0b11 + */ + err = bcm54xx_auxctl_write(phydev, + MII_BCM54XX_AUXCTL_SHDWSEL_MISC, + MII_BCM54XX_AUXCTL_MISC_WREN | + aux_rgmii_en | + MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RSVD); + if (err < 0) + return err; + return bcm5481x_set_brrmode(phydev, priv->brr_mode); } diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 15c35655f482..115a964f3006 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -137,6 +137,7 @@ #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x07 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN 0x0010 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RSVD 0x0060 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_EN 0x0080 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN 0x0100 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 -- cgit v1.2.3 From 21f4d45eba0b2dcae5dbc9e5e0ad08735c993f16 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 9 Oct 2025 16:02:19 +0100 Subject: net/ip6_tunnel: Prevent perpetual tunnel growth Similarly to ipv4 tunnel, ipv6 version updates dev->needed_headroom, too. While ipv4 tunnel headroom adjustment growth was limited in commit 5ae1e9922bbd ("net: ip_tunnel: prevent perpetual headroom growth"), ipv6 tunnel yet increases the headroom without any ceiling. Reflect ipv4 tunnel headroom adjustment limit on ipv6 version. Credits to Francesco Ruggeri, who was originally debugging this issue and wrote local Arista-specific patch and a reproducer. Fixes: 8eb30be0352d ("ipv6: Create ip6_tnl_xmit") Cc: Florian Westphal Cc: Francesco Ruggeri Signed-off-by: Dmitry Safonov Link: https://patch.msgid.link/20251009-ip6_tunnel-headroom-v2-1-8e4dbd8f7e35@arista.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 15 +++++++++++++++ net/ipv4/ip_tunnel.c | 14 -------------- net/ipv6/ip6_tunnel.c | 3 +-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 4314a97702ea..ecae35512b9b 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -611,6 +611,21 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, int headroom, bool reply); +static inline void ip_tunnel_adj_headroom(struct net_device *dev, + unsigned int headroom) +{ + /* we must cap headroom to some upperlimit, else pskb_expand_head + * will overflow header offsets in skb_headers_offset_update(). + */ + const unsigned int max_allowed = 512; + + if (headroom > max_allowed) + headroom = max_allowed; + + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); +} + int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); static inline int iptunnel_pull_offloads(struct sk_buff *skb) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index aaeb5d16f0c9..158a30ae7c5f 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -568,20 +568,6 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } -static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom) -{ - /* we must cap headroom to some upperlimit, else pskb_expand_head - * will overflow header offsets in skb_headers_offset_update(). - */ - static const unsigned int max_allowed = 512; - - if (headroom > max_allowed) - headroom = max_allowed; - - if (headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, headroom); -} - void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto, int tunnel_hlen) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3262e81223df..6405072050e0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1257,8 +1257,7 @@ route_lookup: */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr) + dst->header_len + t->hlen; - if (max_headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, max_headroom); + ip_tunnel_adj_headroom(dev, max_headroom); err = ip6_tnl_encap(skb, t, &proto, fl6); if (err) -- cgit v1.2.3 From a3f8c0a273120fd2638f03403e786c3de2382e72 Mon Sep 17 00:00:00 2001 From: Milena Olech Date: Thu, 9 Oct 2025 17:03:46 -0700 Subject: idpf: cleanup remaining SKBs in PTP flows When the driver requests Tx timestamp value, one of the first steps is to clone SKB using skb_get. It increases the reference counter for that SKB to prevent unexpected freeing by another component. However, there may be a case where the index is requested, SKB is assigned and never consumed by PTP flows - for example due to reset during running PTP apps. Add a check in release timestamping function to verify if the SKB assigned to Tx timestamp latch was freed, and release remaining SKBs. Fixes: 4901e83a94ef ("idpf: add Tx timestamp capabilities negotiation") Signed-off-by: Milena Olech Signed-off-by: Anton Nadezhdin Reviewed-by: Aleksandr Loktionov Tested-by: Samuel Salin Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-1-ef32a425b92a@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_ptp.c | 3 +++ drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_ptp.c index 142823af1f9e..3e1052d070cf 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ptp.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ptp.c @@ -863,6 +863,9 @@ static void idpf_ptp_release_vport_tstamp(struct idpf_vport *vport) u64_stats_inc(&vport->tstamp_stats.flushed); list_del(&ptp_tx_tstamp->list_member); + if (ptp_tx_tstamp->skb) + consume_skb(ptp_tx_tstamp->skb); + kfree(ptp_tx_tstamp); } u64_stats_update_end(&vport->tstamp_stats.stats_sync); diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c index 8a2e0f8c5e36..61cedb6f2854 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c @@ -517,6 +517,7 @@ idpf_ptp_get_tstamp_value(struct idpf_vport *vport, shhwtstamps.hwtstamp = ns_to_ktime(tstamp); skb_tstamp_tx(ptp_tx_tstamp->skb, &shhwtstamps); consume_skb(ptp_tx_tstamp->skb); + ptp_tx_tstamp->skb = NULL; list_add(&ptp_tx_tstamp->list_member, &tx_tstamp_caps->latches_free); -- cgit v1.2.3 From 53f0eb62b4d23d40686f2dd51776b8220f2887bb Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 9 Oct 2025 17:03:47 -0700 Subject: ixgbevf: fix getting link speed data for E610 devices E610 adapters no longer use the VFLINKS register to read PF's link speed and linkup state. As a result VF driver cannot get actual link state and it incorrectly reports 10G which is the default option. It leads to a situation where even 1G adapters print 10G as actual link speed. The same happens when PF driver set speed different than 10G. Add new mailbox operation to let the VF driver request a PF driver to provide actual link data. Update the mailbox api to v1.6. Incorporate both ways of getting link status within the legacy ixgbe_check_mac_link_vf() function. Fixes: 4c44b450c69b ("ixgbevf: Add support for Intel(R) E610 device") Co-developed-by: Andrzej Wilczynski Signed-off-by: Andrzej Wilczynski Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Cc: stable@vger.kernel.org Signed-off-by: Jedrzej Jagielski Tested-by: Rafal Romanowski Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-2-ef32a425b92a@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbevf/defines.h | 1 + drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 +- drivers/net/ethernet/intel/ixgbevf/mbx.h | 4 + drivers/net/ethernet/intel/ixgbevf/vf.c | 137 +++++++++++++++++----- 4 files changed, 116 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index a9bc96f6399d..e177d1d58696 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -28,6 +28,7 @@ /* Link speed */ typedef u32 ixgbe_link_speed; +#define IXGBE_LINK_SPEED_UNKNOWN 0 #define IXGBE_LINK_SPEED_1GB_FULL 0x0020 #define IXGBE_LINK_SPEED_10GB_FULL 0x0080 #define IXGBE_LINK_SPEED_100_FULL 0x0008 diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 28e25641b167..92671638b428 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2275,6 +2275,7 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; static const int api[] = { + ixgbe_mbox_api_16, ixgbe_mbox_api_15, ixgbe_mbox_api_14, ixgbe_mbox_api_13, @@ -2294,7 +2295,8 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) idx++; } - if (hw->api_version >= ixgbe_mbox_api_15) { + /* Following is not supported by API 1.6, it is specific for 1.5 */ + if (hw->api_version == ixgbe_mbox_api_15) { hw->mbx.ops.init_params(hw); memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops, sizeof(struct ixgbe_mbx_operations)); @@ -2651,6 +2653,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: if (adapter->xdp_prog && hw->mac.max_tx_queues == rss) rss = rss > 3 ? 2 : 1; @@ -4645,6 +4648,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN); break; diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h index 835bbcc5cc8e..c1494fd1f67b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.h +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h @@ -66,6 +66,7 @@ enum ixgbe_pfvf_api_rev { ixgbe_mbox_api_13, /* API version 1.3, linux/freebsd VF driver */ ixgbe_mbox_api_14, /* API version 1.4, linux/freebsd VF driver */ ixgbe_mbox_api_15, /* API version 1.5, linux/freebsd VF driver */ + ixgbe_mbox_api_16, /* API version 1.6, linux/freebsd VF driver */ /* This value should always be last */ ixgbe_mbox_api_unknown, /* indicates that API version is not known */ }; @@ -102,6 +103,9 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */ +/* mailbox API, version 1.6 VF requests */ +#define IXGBE_VF_GET_PF_LINK_STATE 0x11 /* request PF to send link info */ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index dcaef34b88b6..f05246fb5a74 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -313,6 +313,7 @@ int ixgbevf_get_reta_locked(struct ixgbe_hw *hw, u32 *reta, int num_rx_queues) * is not supported for this device type. */ switch (hw->api_version) { + case ixgbe_mbox_api_16: case ixgbe_mbox_api_15: case ixgbe_mbox_api_14: case ixgbe_mbox_api_13: @@ -382,6 +383,7 @@ int ixgbevf_get_rss_key_locked(struct ixgbe_hw *hw, u8 *rss_key) * or if the operation is not supported for this device type. */ switch (hw->api_version) { + case ixgbe_mbox_api_16: case ixgbe_mbox_api_15: case ixgbe_mbox_api_14: case ixgbe_mbox_api_13: @@ -552,6 +554,7 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: break; default: return -EOPNOTSUPP; @@ -624,6 +627,48 @@ static s32 ixgbevf_hv_get_link_state_vf(struct ixgbe_hw *hw, bool *link_state) return -EOPNOTSUPP; } +/** + * ixgbevf_get_pf_link_state - Get PF's link status + * @hw: pointer to the HW structure + * @speed: link speed + * @link_up: indicate if link is up/down + * + * Ask PF to provide link_up state and speed of the link. + * + * Return: IXGBE_ERR_MBX in the case of mailbox error, + * -EOPNOTSUPP if the op is not supported or 0 on success. + */ +static int ixgbevf_get_pf_link_state(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up) +{ + u32 msgbuf[3] = {}; + int err; + + switch (hw->api_version) { + case ixgbe_mbox_api_16: + break; + default: + return -EOPNOTSUPP; + } + + msgbuf[0] = IXGBE_VF_GET_PF_LINK_STATE; + + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) { + err = IXGBE_ERR_MBX; + *speed = IXGBE_LINK_SPEED_UNKNOWN; + /* No need to set @link_up to false as it will be done by + * ixgbe_check_mac_link_vf(). + */ + } else { + *speed = msgbuf[1]; + *link_up = msgbuf[2]; + } + + return err; +} + /** * ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address * @hw: pointer to the HW structure @@ -658,6 +703,58 @@ mbx_err: return err; } +/** + * ixgbe_read_vflinks - Read VFLINKS register + * @hw: pointer to the HW structure + * @speed: link speed + * @link_up: indicate if link is up/down + * + * Get linkup status and link speed from the VFLINKS register. + */ +static void ixgbe_read_vflinks(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up) +{ + u32 vflinks = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + + /* if link status is down no point in checking to see if PF is up */ + if (!(vflinks & IXGBE_LINKS_UP)) { + *link_up = false; + return; + } + + /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs + * before the link status is correct + */ + if (hw->mac.type == ixgbe_mac_82599_vf) { + for (int i = 0; i < 5; i++) { + udelay(100); + vflinks = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + + if (!(vflinks & IXGBE_LINKS_UP)) { + *link_up = false; + return; + } + } + } + + /* We reached this point so there's link */ + *link_up = true; + + switch (vflinks & IXGBE_LINKS_SPEED_82599) { + case IXGBE_LINKS_SPEED_10G_82599: + *speed = IXGBE_LINK_SPEED_10GB_FULL; + break; + case IXGBE_LINKS_SPEED_1G_82599: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + break; + case IXGBE_LINKS_SPEED_100_82599: + *speed = IXGBE_LINK_SPEED_100_FULL; + break; + default: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + } +} + /** * ixgbevf_hv_set_vfta_vf - * Hyper-V variant - just a stub. * @hw: unused @@ -705,7 +802,6 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, struct ixgbe_mbx_info *mbx = &hw->mbx; struct ixgbe_mac_info *mac = &hw->mac; s32 ret_val = 0; - u32 links_reg; u32 in_msg = 0; /* If we were hit with a reset drop the link */ @@ -715,36 +811,14 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, if (!mac->get_link_status) goto out; - /* if link status is down no point in checking to see if pf is up */ - links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); - if (!(links_reg & IXGBE_LINKS_UP)) - goto out; - - /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs - * before the link status is correct - */ - if (mac->type == ixgbe_mac_82599_vf) { - int i; - - for (i = 0; i < 5; i++) { - udelay(100); - links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); - - if (!(links_reg & IXGBE_LINKS_UP)) - goto out; - } - } - - switch (links_reg & IXGBE_LINKS_SPEED_82599) { - case IXGBE_LINKS_SPEED_10G_82599: - *speed = IXGBE_LINK_SPEED_10GB_FULL; - break; - case IXGBE_LINKS_SPEED_1G_82599: - *speed = IXGBE_LINK_SPEED_1GB_FULL; - break; - case IXGBE_LINKS_SPEED_100_82599: - *speed = IXGBE_LINK_SPEED_100_FULL; - break; + if (hw->mac.type == ixgbe_mac_e610_vf) { + ret_val = ixgbevf_get_pf_link_state(hw, speed, link_up); + if (ret_val) + goto out; + } else { + ixgbe_read_vflinks(hw, speed, link_up); + if (*link_up == false) + goto out; } /* if the read failed it could just be a mailbox collision, best wait @@ -951,6 +1025,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: break; default: return 0; -- cgit v1.2.3 From f7f97cbc03a470ce405d48dedb7f135713caa0fa Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 9 Oct 2025 17:03:48 -0700 Subject: ixgbe: handle IXGBE_VF_GET_PF_LINK_STATE mailbox operation Update supported API version and provide handler for IXGBE_VF_GET_PF_LINK_STATE cmd. Simply put stored values of link speed and link_up from adapter context. Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Signed-off-by: Jedrzej Jagielski Link: https://lore.kernel.org/stable/20250828095227.1857066-3-jedrzej.jagielski%40intel.com Tested-by: Rafal Romanowski Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-3-ef32a425b92a@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h | 5 +++ drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 42 ++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h index 4af149b63a39..f7256a339c99 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h @@ -50,6 +50,8 @@ enum ixgbe_pfvf_api_rev { ixgbe_mbox_api_12, /* API version 1.2, linux/freebsd VF driver */ ixgbe_mbox_api_13, /* API version 1.3, linux/freebsd VF driver */ ixgbe_mbox_api_14, /* API version 1.4, linux/freebsd VF driver */ + ixgbe_mbox_api_15, /* API version 1.5, linux/freebsd VF driver */ + ixgbe_mbox_api_16, /* API version 1.6, linux/freebsd VF driver */ /* This value should always be last */ ixgbe_mbox_api_unknown, /* indicates that API version is not known */ }; @@ -86,6 +88,9 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */ +/* mailbox API, version 1.6 VF requests */ +#define IXGBE_VF_GET_PF_LINK_STATE 0x11 /* request PF to send link info */ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 32ac1e020d91..b09271d61a4e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -510,6 +510,7 @@ static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf case ixgbe_mbox_api_12: case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: + case ixgbe_mbox_api_16: /* Version 1.1 supports jumbo frames on VFs if PF has * jumbo frames enabled which means legacy VFs are * disabled @@ -1046,6 +1047,7 @@ static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_12: case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: + case ixgbe_mbox_api_16: adapter->vfinfo[vf].vf_api = api; return 0; default: @@ -1072,6 +1074,7 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_12: case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: + case ixgbe_mbox_api_16: break; default: return -1; @@ -1112,6 +1115,7 @@ static int ixgbe_get_vf_reta(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) /* verify the PF is supporting the correct API */ switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_16: case ixgbe_mbox_api_14: case ixgbe_mbox_api_13: case ixgbe_mbox_api_12: @@ -1145,6 +1149,7 @@ static int ixgbe_get_vf_rss_key(struct ixgbe_adapter *adapter, /* verify the PF is supporting the correct API */ switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_16: case ixgbe_mbox_api_14: case ixgbe_mbox_api_13: case ixgbe_mbox_api_12: @@ -1174,6 +1179,7 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter, fallthrough; case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: + case ixgbe_mbox_api_16: break; default: return -EOPNOTSUPP; @@ -1244,6 +1250,7 @@ static int ixgbe_get_vf_link_state(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_12: case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: + case ixgbe_mbox_api_16: break; default: return -EOPNOTSUPP; @@ -1254,6 +1261,38 @@ static int ixgbe_get_vf_link_state(struct ixgbe_adapter *adapter, return 0; } +/** + * ixgbe_send_vf_link_status - send link status data to VF + * @adapter: pointer to adapter struct + * @msgbuf: pointer to message buffers + * @vf: VF identifier + * + * Reply for IXGBE_VF_GET_PF_LINK_STATE mbox command sending link status data. + * + * Return: 0 on success or -EOPNOTSUPP when operation is not supported. + */ +static int ixgbe_send_vf_link_status(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + struct ixgbe_hw *hw = &adapter->hw; + + switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_16: + if (hw->mac.type != ixgbe_mac_e610) + return -EOPNOTSUPP; + break; + default: + return -EOPNOTSUPP; + } + /* Simply provide stored values as watchdog & link status events take + * care of its freshness. + */ + msgbuf[1] = adapter->link_speed; + msgbuf[2] = adapter->link_up; + + return 0; +} + static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) { u32 mbx_size = IXGBE_VFMAILBOX_SIZE; @@ -1328,6 +1367,9 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) case IXGBE_VF_IPSEC_DEL: retval = ixgbe_ipsec_vf_del_sa(adapter, msgbuf, vf); break; + case IXGBE_VF_GET_PF_LINK_STATE: + retval = ixgbe_send_vf_link_status(adapter, msgbuf, vf); + break; default: e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]); retval = -EIO; -- cgit v1.2.3 From a7075f501bd33c93570af759b6f4302ef0175168 Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 9 Oct 2025 17:03:49 -0700 Subject: ixgbevf: fix mailbox API compatibility by negotiating supported features There was backward compatibility in the terms of mailbox API. Various drivers from various OSes supporting 10G adapters from Intel portfolio could easily negotiate mailbox API. This convention has been broken since introducing API 1.4. Commit 0062e7cc955e ("ixgbevf: add VF IPsec offload code") added support for IPSec which is specific only for the kernel ixgbe driver. None of the rest of the Intel 10G PF/VF drivers supports it. And actually lack of support was not included in the IPSec implementation - there were no such code paths. No possibility to negotiate support for the feature was introduced along with introduction of the feature itself. Commit 339f28964147 ("ixgbevf: Add support for new mailbox communication between PF and VF") increasing API version to 1.5 did the same - it introduced code supported specifically by the PF ESX driver. It altered API version for the VF driver in the same time not touching the version defined for the PF ixgbe driver. It led to additional discrepancies, as the code provided within API 1.6 cannot be supported for Linux ixgbe driver as it causes crashes. The issue was noticed some time ago and mitigated by Jake within the commit d0725312adf5 ("ixgbevf: stop attempting IPSEC offload on Mailbox API 1.5"). As a result we have regression for IPsec support and after increasing API to version 1.6 ixgbevf driver stopped to support ESX MBX. To fix this mess add new mailbox op asking PF driver about supported features. Basing on a response determine whether to set support for IPSec and ESX-specific enhanced mailbox. New mailbox op, for compatibility purposes, must be added within new API revision, as API version of OOT PF & VF drivers is already increased to 1.6 and doesn't incorporate features negotiate op. Features negotiation mechanism gives possibility to be extended with new features when needed in the future. Reported-by: Jacob Keller Closes: https://lore.kernel.org/intel-wired-lan/20241101-jk-ixgbevf-mailbox-v1-5-fixes-v1-0-f556dc9a66ed@intel.com/ Fixes: 0062e7cc955e ("ixgbevf: add VF IPsec offload code") Fixes: 339f28964147 ("ixgbevf: Add support for new mailbox communication between PF and VF") Reviewed-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Cc: stable@vger.kernel.org Signed-off-by: Jedrzej Jagielski Tested-by: Rafal Romanowski Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-4-ef32a425b92a@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbevf/ipsec.c | 10 +++++ drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 7 ++++ drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 32 +++++++++++++++- drivers/net/ethernet/intel/ixgbevf/mbx.h | 4 ++ drivers/net/ethernet/intel/ixgbevf/vf.c | 45 ++++++++++++++++++++++- drivers/net/ethernet/intel/ixgbevf/vf.h | 1 + 6 files changed, 96 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c index 65580b9cb06f..fce35924ff8b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c +++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c @@ -273,6 +273,9 @@ static int ixgbevf_ipsec_add_sa(struct net_device *dev, adapter = netdev_priv(dev); ipsec = adapter->ipsec; + if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC)) + return -EOPNOTSUPP; + if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) { NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol for IPsec offload"); return -EINVAL; @@ -405,6 +408,9 @@ static void ixgbevf_ipsec_del_sa(struct net_device *dev, adapter = netdev_priv(dev); ipsec = adapter->ipsec; + if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC)) + return; + if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) { sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX; @@ -612,6 +618,10 @@ void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter) size_t size; switch (adapter->hw.api_version) { + case ixgbe_mbox_api_17: + if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC)) + return; + break; case ixgbe_mbox_api_14: break; default: diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 3a379e6a3a2a..039187607e98 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -363,6 +363,13 @@ struct ixgbevf_adapter { struct ixgbe_hw hw; u16 msg_enable; + u32 pf_features; +#define IXGBEVF_PF_SUP_IPSEC BIT(0) +#define IXGBEVF_PF_SUP_ESX_MBX BIT(1) + +#define IXGBEVF_SUPPORTED_FEATURES (IXGBEVF_PF_SUP_IPSEC | \ + IXGBEVF_PF_SUP_ESX_MBX) + struct ixgbevf_hw_stats stats; unsigned long state; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 92671638b428..d5ce20f47def 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2271,10 +2271,35 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter) adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; } +/** + * ixgbevf_set_features - Set features supported by PF + * @adapter: pointer to the adapter struct + * + * Negotiate with PF supported features and then set pf_features accordingly. + */ +static void ixgbevf_set_features(struct ixgbevf_adapter *adapter) +{ + u32 *pf_features = &adapter->pf_features; + struct ixgbe_hw *hw = &adapter->hw; + int err; + + err = hw->mac.ops.negotiate_features(hw, pf_features); + if (err && err != -EOPNOTSUPP) + netdev_dbg(adapter->netdev, + "PF feature negotiation failed.\n"); + + /* Address also pre API 1.7 cases */ + if (hw->api_version == ixgbe_mbox_api_14) + *pf_features |= IXGBEVF_PF_SUP_IPSEC; + else if (hw->api_version == ixgbe_mbox_api_15) + *pf_features |= IXGBEVF_PF_SUP_ESX_MBX; +} + static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; static const int api[] = { + ixgbe_mbox_api_17, ixgbe_mbox_api_16, ixgbe_mbox_api_15, ixgbe_mbox_api_14, @@ -2295,8 +2320,9 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) idx++; } - /* Following is not supported by API 1.6, it is specific for 1.5 */ - if (hw->api_version == ixgbe_mbox_api_15) { + ixgbevf_set_features(adapter); + + if (adapter->pf_features & IXGBEVF_PF_SUP_ESX_MBX) { hw->mbx.ops.init_params(hw); memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops, sizeof(struct ixgbe_mbx_operations)); @@ -2654,6 +2680,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: if (adapter->xdp_prog && hw->mac.max_tx_queues == rss) rss = rss > 3 ? 2 : 1; @@ -4649,6 +4676,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN); break; diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h index c1494fd1f67b..a8ed23ee66aa 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.h +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h @@ -67,6 +67,7 @@ enum ixgbe_pfvf_api_rev { ixgbe_mbox_api_14, /* API version 1.4, linux/freebsd VF driver */ ixgbe_mbox_api_15, /* API version 1.5, linux/freebsd VF driver */ ixgbe_mbox_api_16, /* API version 1.6, linux/freebsd VF driver */ + ixgbe_mbox_api_17, /* API version 1.7, linux/freebsd VF driver */ /* This value should always be last */ ixgbe_mbox_api_unknown, /* indicates that API version is not known */ }; @@ -106,6 +107,9 @@ enum ixgbe_pfvf_api_rev { /* mailbox API, version 1.6 VF requests */ #define IXGBE_VF_GET_PF_LINK_STATE 0x11 /* request PF to send link info */ +/* mailbox API, version 1.7 VF requests */ +#define IXGBE_VF_FEATURES_NEGOTIATE 0x12 /* get features supported by PF*/ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index f05246fb5a74..74d320879513 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -313,6 +313,7 @@ int ixgbevf_get_reta_locked(struct ixgbe_hw *hw, u32 *reta, int num_rx_queues) * is not supported for this device type. */ switch (hw->api_version) { + case ixgbe_mbox_api_17: case ixgbe_mbox_api_16: case ixgbe_mbox_api_15: case ixgbe_mbox_api_14: @@ -383,6 +384,7 @@ int ixgbevf_get_rss_key_locked(struct ixgbe_hw *hw, u8 *rss_key) * or if the operation is not supported for this device type. */ switch (hw->api_version) { + case ixgbe_mbox_api_17: case ixgbe_mbox_api_16: case ixgbe_mbox_api_15: case ixgbe_mbox_api_14: @@ -555,6 +557,7 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: break; default: return -EOPNOTSUPP; @@ -646,6 +649,7 @@ static int ixgbevf_get_pf_link_state(struct ixgbe_hw *hw, ixgbe_link_speed *spee switch (hw->api_version) { case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: break; default: return -EOPNOTSUPP; @@ -669,6 +673,42 @@ static int ixgbevf_get_pf_link_state(struct ixgbe_hw *hw, ixgbe_link_speed *spee return err; } +/** + * ixgbevf_negotiate_features_vf - negotiate supported features with PF driver + * @hw: pointer to the HW structure + * @pf_features: bitmask of features supported by PF + * + * Return: IXGBE_ERR_MBX in the case of mailbox error, + * -EOPNOTSUPP if the op is not supported or 0 on success. + */ +static int ixgbevf_negotiate_features_vf(struct ixgbe_hw *hw, u32 *pf_features) +{ + u32 msgbuf[2] = {}; + int err; + + switch (hw->api_version) { + case ixgbe_mbox_api_17: + break; + default: + return -EOPNOTSUPP; + } + + msgbuf[0] = IXGBE_VF_FEATURES_NEGOTIATE; + msgbuf[1] = IXGBEVF_SUPPORTED_FEATURES; + + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + + if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) { + err = IXGBE_ERR_MBX; + *pf_features = 0x0; + } else { + *pf_features = msgbuf[1]; + } + + return err; +} + /** * ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address * @hw: pointer to the HW structure @@ -799,6 +839,7 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, bool *link_up, bool autoneg_wait_to_complete) { + struct ixgbevf_adapter *adapter = hw->back; struct ixgbe_mbx_info *mbx = &hw->mbx; struct ixgbe_mac_info *mac = &hw->mac; s32 ret_val = 0; @@ -825,7 +866,7 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, * until we are called again and don't report an error */ if (mbx->ops.read(hw, &in_msg, 1)) { - if (hw->api_version >= ixgbe_mbox_api_15) + if (adapter->pf_features & IXGBEVF_PF_SUP_ESX_MBX) mac->get_link_status = false; goto out; } @@ -1026,6 +1067,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: break; default: return 0; @@ -1080,6 +1122,7 @@ static const struct ixgbe_mac_operations ixgbevf_mac_ops = { .setup_link = ixgbevf_setup_mac_link_vf, .check_link = ixgbevf_check_mac_link_vf, .negotiate_api_version = ixgbevf_negotiate_api_version_vf, + .negotiate_features = ixgbevf_negotiate_features_vf, .set_rar = ixgbevf_set_rar_vf, .update_mc_addr_list = ixgbevf_update_mc_addr_list_vf, .update_xcast_mode = ixgbevf_update_xcast_mode, diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index 2d791bc26ae4..4f19b8900c29 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -26,6 +26,7 @@ struct ixgbe_mac_operations { s32 (*stop_adapter)(struct ixgbe_hw *); s32 (*get_bus_info)(struct ixgbe_hw *); s32 (*negotiate_api_version)(struct ixgbe_hw *hw, int api); + int (*negotiate_features)(struct ixgbe_hw *hw, u32 *pf_features); /* Link */ s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool); -- cgit v1.2.3 From 823be089f9c8ab136ba382b516aedd3f7ac854bd Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 9 Oct 2025 17:03:50 -0700 Subject: ixgbe: handle IXGBE_VF_FEATURES_NEGOTIATE mbox cmd Send to VF information about features supported by the PF driver. Increase API version to 1.7. Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Signed-off-by: Jedrzej Jagielski Tested-by: Rafal Romanowski Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-5-ef32a425b92a@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h | 10 +++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 37 ++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h index f7256a339c99..0334ed4b8fa3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h @@ -52,6 +52,7 @@ enum ixgbe_pfvf_api_rev { ixgbe_mbox_api_14, /* API version 1.4, linux/freebsd VF driver */ ixgbe_mbox_api_15, /* API version 1.5, linux/freebsd VF driver */ ixgbe_mbox_api_16, /* API version 1.6, linux/freebsd VF driver */ + ixgbe_mbox_api_17, /* API version 1.7, linux/freebsd VF driver */ /* This value should always be last */ ixgbe_mbox_api_unknown, /* indicates that API version is not known */ }; @@ -91,6 +92,9 @@ enum ixgbe_pfvf_api_rev { /* mailbox API, version 1.6 VF requests */ #define IXGBE_VF_GET_PF_LINK_STATE 0x11 /* request PF to send link info */ +/* mailbox API, version 1.7 VF requests */ +#define IXGBE_VF_FEATURES_NEGOTIATE 0x12 /* get features supported by PF */ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ @@ -101,6 +105,12 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */ #define IXGBE_VF_MBX_INIT_DELAY 500 /* microseconds between retries */ +/* features negotiated between PF/VF */ +#define IXGBEVF_PF_SUP_IPSEC BIT(0) +#define IXGBEVF_PF_SUP_ESX_MBX BIT(1) + +#define IXGBE_SUPPORTED_FEATURES IXGBEVF_PF_SUP_IPSEC + struct ixgbe_hw; int ixgbe_read_mbx(struct ixgbe_hw *, u32 *, u16, u16); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index b09271d61a4e..ee133d6749b3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -511,6 +511,7 @@ static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: /* Version 1.1 supports jumbo frames on VFs if PF has * jumbo frames enabled which means legacy VFs are * disabled @@ -1048,6 +1049,7 @@ static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: adapter->vfinfo[vf].vf_api = api; return 0; default: @@ -1075,6 +1077,7 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: break; default: return -1; @@ -1115,6 +1118,7 @@ static int ixgbe_get_vf_reta(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) /* verify the PF is supporting the correct API */ switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_17: case ixgbe_mbox_api_16: case ixgbe_mbox_api_14: case ixgbe_mbox_api_13: @@ -1149,6 +1153,7 @@ static int ixgbe_get_vf_rss_key(struct ixgbe_adapter *adapter, /* verify the PF is supporting the correct API */ switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_17: case ixgbe_mbox_api_16: case ixgbe_mbox_api_14: case ixgbe_mbox_api_13: @@ -1180,6 +1185,7 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: break; default: return -EOPNOTSUPP; @@ -1251,6 +1257,7 @@ static int ixgbe_get_vf_link_state(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: break; default: return -EOPNOTSUPP; @@ -1278,6 +1285,7 @@ static int ixgbe_send_vf_link_status(struct ixgbe_adapter *adapter, switch (adapter->vfinfo[vf].vf_api) { case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: if (hw->mac.type != ixgbe_mac_e610) return -EOPNOTSUPP; break; @@ -1293,6 +1301,32 @@ static int ixgbe_send_vf_link_status(struct ixgbe_adapter *adapter, return 0; } +/** + * ixgbe_negotiate_vf_features - negotiate supported features with VF driver + * @adapter: pointer to adapter struct + * @msgbuf: pointer to message buffers + * @vf: VF identifier + * + * Return: 0 on success or -EOPNOTSUPP when operation is not supported. + */ +static int ixgbe_negotiate_vf_features(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + u32 features = msgbuf[1]; + + switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_17: + break; + default: + return -EOPNOTSUPP; + } + + features &= IXGBE_SUPPORTED_FEATURES; + msgbuf[1] = features; + + return 0; +} + static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) { u32 mbx_size = IXGBE_VFMAILBOX_SIZE; @@ -1370,6 +1404,9 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) case IXGBE_VF_GET_PF_LINK_STATE: retval = ixgbe_send_vf_link_status(adapter, msgbuf, vf); break; + case IXGBE_VF_FEATURES_NEGOTIATE: + retval = ixgbe_negotiate_vf_features(adapter, msgbuf, vf); + break; default: e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]); retval = -EIO; -- cgit v1.2.3 From 5feef67b646d8f5064bac288e22204ffba2b9a4a Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Thu, 9 Oct 2025 17:03:51 -0700 Subject: ixgbe: fix too early devlink_free() in ixgbe_remove() Since ixgbe_adapter is embedded in devlink, calling devlink_free() prematurely in the ixgbe_remove() path can lead to UAF. Move devlink_free() to the end. KASAN report: BUG: KASAN: use-after-free in ixgbe_reset_interrupt_capability+0x140/0x180 [ixgbe] Read of size 8 at addr ffff0000adf813e0 by task bash/2095 CPU: 1 UID: 0 PID: 2095 Comm: bash Tainted: G S 6.17.0-rc2-tnguy.net-queue+ #1 PREEMPT(full) [...] Call trace: show_stack+0x30/0x90 (C) dump_stack_lvl+0x9c/0xd0 print_address_description.constprop.0+0x90/0x310 print_report+0x104/0x1f0 kasan_report+0x88/0x180 __asan_report_load8_noabort+0x20/0x30 ixgbe_reset_interrupt_capability+0x140/0x180 [ixgbe] ixgbe_clear_interrupt_scheme+0xf8/0x130 [ixgbe] ixgbe_remove+0x2d0/0x8c0 [ixgbe] pci_device_remove+0xa0/0x220 device_remove+0xb8/0x170 device_release_driver_internal+0x318/0x490 device_driver_detach+0x40/0x68 unbind_store+0xec/0x118 drv_attr_store+0x64/0xb8 sysfs_kf_write+0xcc/0x138 kernfs_fop_write_iter+0x294/0x440 new_sync_write+0x1fc/0x588 vfs_write+0x480/0x6a0 ksys_write+0xf0/0x1e0 __arm64_sys_write+0x70/0xc0 invoke_syscall.constprop.0+0xcc/0x280 el0_svc_common.constprop.0+0xa8/0x248 do_el0_svc+0x44/0x68 el0_svc+0x54/0x160 el0t_64_sync_handler+0xa0/0xe8 el0t_64_sync+0x1b0/0x1b8 Fixes: a0285236ab93 ("ixgbe: add initial devlink support") Signed-off-by: Koichiro Den Tested-by: Rinitha S Reviewed-by: Jedrzej Jagielski Reviewed-by: Aleksandr Loktionov Reviewed-by: Paul Menzel Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-6-ef32a425b92a@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 90d4e57b1c93..ca1ccc630001 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -12101,7 +12101,6 @@ static void ixgbe_remove(struct pci_dev *pdev) devl_port_unregister(&adapter->devlink_port); devl_unlock(adapter->devlink); - devlink_free(adapter->devlink); ixgbe_stop_ipsec_offload(adapter); ixgbe_clear_interrupt_scheme(adapter); @@ -12137,6 +12136,8 @@ static void ixgbe_remove(struct pci_dev *pdev) if (disable_dev) pci_disable_device(pdev); + + devlink_free(adapter->devlink); } /** -- cgit v1.2.3 From 2c67301584f2671e320236df6bbe75ae09feb4d0 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 11 Oct 2025 13:02:49 +0200 Subject: net: phy: realtek: Avoid PHYCR2 access if PHYCR2 not present The driver is currently checking for PHYCR2 register presence in rtl8211f_config_init(), but it does so after accessing PHYCR2 to disable EEE. This was introduced in commit bfc17c165835 ("net: phy: realtek: disable PHY-mode EEE"). Move the PHYCR2 presence test before the EEE disablement and simplify the code. Fixes: bfc17c165835 ("net: phy: realtek: disable PHY-mode EEE") Signed-off-by: Marek Vasut Reviewed-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20251011110309.12664-1-marek.vasut@mailbox.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek/realtek_main.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 82d8e1335215..a724b21b4fe7 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -633,26 +633,25 @@ static int rtl8211f_config_init(struct phy_device *phydev) str_enabled_disabled(val_rxdly)); } + if (!priv->has_phycr2) + return 0; + /* Disable PHY-mode EEE so LPI is passed to the MAC */ ret = phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR2, RTL8211F_PHYCR2_PHY_EEE_ENABLE, 0); if (ret) return ret; - if (priv->has_phycr2) { - ret = phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, - RTL8211F_PHYCR2, RTL8211F_CLKOUT_EN, - priv->phycr2); - if (ret < 0) { - dev_err(dev, "clkout configuration failed: %pe\n", - ERR_PTR(ret)); - return ret; - } - - return genphy_soft_reset(phydev); + ret = phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, + RTL8211F_PHYCR2, RTL8211F_CLKOUT_EN, + priv->phycr2); + if (ret < 0) { + dev_err(dev, "clkout configuration failed: %pe\n", + ERR_PTR(ret)); + return ret; } - return 0; + return genphy_soft_reset(phydev); } static int rtl821x_suspend(struct phy_device *phydev) -- cgit v1.2.3 From c065b6046b3493a878c2ceb810aed845431badb4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 13 Oct 2025 21:50:40 -0400 Subject: Use CONFIG_EXT4_FS instead of CONFIG_EXT3_FS in all of the defconfigs Commit d6ace46c82fd ("ext4: remove obsolete EXT3 config options") removed the obsolete EXT3_CONFIG options, since it had been over a decade since fs/ext3 had been removed. Unfortunately, there were a number of defconfigs that still used CONFIG_EXT3_FS which the cleanup commit didn't fix up. This led to a large number of defconfig test builds to fail. Oops. Fixes: d6ace46c82fd ("ext4: remove obsolete EXT3 config options") Signed-off-by: Theodore Ts'o --- arch/arc/configs/axs101_defconfig | 2 +- arch/arc/configs/axs103_defconfig | 2 +- arch/arc/configs/axs103_smp_defconfig | 2 +- arch/arc/configs/hsdk_defconfig | 2 +- arch/arc/configs/vdk_hs38_defconfig | 2 +- arch/arc/configs/vdk_hs38_smp_defconfig | 2 +- arch/arm/configs/axm55xx_defconfig | 2 +- arch/arm/configs/bcm2835_defconfig | 4 ++-- arch/arm/configs/davinci_all_defconfig | 2 +- arch/arm/configs/dove_defconfig | 4 ++-- arch/arm/configs/ep93xx_defconfig | 4 ++-- arch/arm/configs/imx_v6_v7_defconfig | 6 +++--- arch/arm/configs/ixp4xx_defconfig | 4 ++-- arch/arm/configs/mmp2_defconfig | 2 +- arch/arm/configs/moxart_defconfig | 2 +- arch/arm/configs/multi_v5_defconfig | 2 +- arch/arm/configs/mv78xx0_defconfig | 4 ++-- arch/arm/configs/mvebu_v5_defconfig | 2 +- arch/arm/configs/nhk8815_defconfig | 2 +- arch/arm/configs/omap1_defconfig | 2 +- arch/arm/configs/omap2plus_defconfig | 2 +- arch/arm/configs/orion5x_defconfig | 4 ++-- arch/arm/configs/pxa_defconfig | 6 +++--- arch/arm/configs/qcom_defconfig | 2 +- arch/arm/configs/rpc_defconfig | 2 +- arch/arm/configs/s3c6400_defconfig | 6 +++--- arch/arm/configs/sama7_defconfig | 2 +- arch/arm/configs/socfpga_defconfig | 2 +- arch/arm/configs/spear13xx_defconfig | 4 ++-- arch/arm/configs/spear3xx_defconfig | 4 ++-- arch/arm/configs/spear6xx_defconfig | 4 ++-- arch/arm/configs/spitz_defconfig | 4 ++-- arch/arm/configs/stm32_defconfig | 2 +- arch/arm/configs/tegra_defconfig | 6 +++--- arch/arm/configs/u8500_defconfig | 2 +- arch/arm/configs/vexpress_defconfig | 2 +- arch/hexagon/configs/comet_defconfig | 6 +++--- arch/loongarch/configs/loongson3_defconfig | 6 +++--- arch/m68k/configs/stmark2_defconfig | 6 +++--- arch/microblaze/configs/mmu_defconfig | 2 +- arch/mips/configs/bigsur_defconfig | 6 +++--- arch/mips/configs/cobalt_defconfig | 6 +++--- arch/mips/configs/decstation_64_defconfig | 6 +++--- arch/mips/configs/decstation_defconfig | 6 +++--- arch/mips/configs/decstation_r4k_defconfig | 6 +++--- arch/mips/configs/fuloong2e_defconfig | 2 +- arch/mips/configs/ip22_defconfig | 6 +++--- arch/mips/configs/ip27_defconfig | 6 +++--- arch/mips/configs/ip28_defconfig | 6 +++--- arch/mips/configs/ip30_defconfig | 6 +++--- arch/mips/configs/ip32_defconfig | 6 +++--- arch/mips/configs/jazz_defconfig | 2 +- arch/mips/configs/lemote2f_defconfig | 6 +++--- arch/mips/configs/loongson1b_defconfig | 6 +++--- arch/mips/configs/loongson1c_defconfig | 6 +++--- arch/mips/configs/loongson2k_defconfig | 6 +++--- arch/mips/configs/loongson3_defconfig | 6 +++--- arch/mips/configs/malta_defconfig | 2 +- arch/mips/configs/malta_kvm_defconfig | 2 +- arch/mips/configs/malta_qemu_32r6_defconfig | 2 +- arch/mips/configs/maltaaprp_defconfig | 2 +- arch/mips/configs/maltasmvp_defconfig | 6 +++--- arch/mips/configs/maltasmvp_eva_defconfig | 2 +- arch/mips/configs/maltaup_defconfig | 2 +- arch/mips/configs/maltaup_xpa_defconfig | 2 +- arch/mips/configs/mtx1_defconfig | 6 +++--- arch/mips/configs/rm200_defconfig | 2 +- arch/openrisc/configs/or1klitex_defconfig | 2 +- arch/openrisc/configs/virt_defconfig | 4 ++-- arch/parisc/configs/generic-32bit_defconfig | 4 ++-- arch/parisc/configs/generic-64bit_defconfig | 4 ++-- arch/sh/configs/ap325rxa_defconfig | 6 +++--- arch/sh/configs/apsh4a3a_defconfig | 2 +- arch/sh/configs/apsh4ad0a_defconfig | 2 +- arch/sh/configs/ecovec24_defconfig | 6 +++--- arch/sh/configs/edosk7760_defconfig | 2 +- arch/sh/configs/espt_defconfig | 2 +- arch/sh/configs/landisk_defconfig | 2 +- arch/sh/configs/lboxre2_defconfig | 2 +- arch/sh/configs/magicpanelr2_defconfig | 4 ++-- arch/sh/configs/r7780mp_defconfig | 2 +- arch/sh/configs/r7785rp_defconfig | 2 +- arch/sh/configs/rsk7264_defconfig | 2 +- arch/sh/configs/rsk7269_defconfig | 2 +- arch/sh/configs/sdk7780_defconfig | 4 ++-- arch/sh/configs/sdk7786_defconfig | 2 +- arch/sh/configs/se7343_defconfig | 2 +- arch/sh/configs/se7712_defconfig | 2 +- arch/sh/configs/se7721_defconfig | 2 +- arch/sh/configs/se7722_defconfig | 2 +- arch/sh/configs/se7724_defconfig | 6 +++--- arch/sh/configs/sh03_defconfig | 4 ++-- arch/sh/configs/sh2007_defconfig | 2 +- arch/sh/configs/sh7757lcr_defconfig | 2 +- arch/sh/configs/sh7763rdp_defconfig | 2 +- arch/sh/configs/sh7785lcr_32bit_defconfig | 2 +- arch/sh/configs/sh7785lcr_defconfig | 2 +- arch/sh/configs/shx3_defconfig | 2 +- arch/sh/configs/titan_defconfig | 4 ++-- arch/sh/configs/ul2_defconfig | 2 +- arch/sh/configs/urquell_defconfig | 2 +- arch/sparc/configs/sparc64_defconfig | 6 +++--- arch/xtensa/configs/audio_kc705_defconfig | 2 +- arch/xtensa/configs/cadence_csp_defconfig | 2 +- arch/xtensa/configs/generic_kc705_defconfig | 2 +- arch/xtensa/configs/nommu_kc705_defconfig | 2 +- arch/xtensa/configs/smp_lx200_defconfig | 2 +- arch/xtensa/configs/virt_defconfig | 2 +- arch/xtensa/configs/xip_kc705_defconfig | 2 +- 109 files changed, 182 insertions(+), 182 deletions(-) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index a7cd526dd7ca..f930396d9dae 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -88,7 +88,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index afa6a348f444..6b779dee5ea0 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -86,7 +86,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 2bfa6371953c..a89b50d5369d 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -88,7 +88,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 1558e8e87767..1b8b2a098cda 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -77,7 +77,7 @@ CONFIG_DMADEVICES=y CONFIG_DW_AXI_DMAC=y CONFIG_IIO=y CONFIG_TI_ADC108S102=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index 03d9ac20baa9..b7120523e09a 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -74,7 +74,7 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_SERIAL=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index c09488992f13..4077abd5980c 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -81,7 +81,7 @@ CONFIG_MMC_DW=y CONFIG_UIO=y CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arm/configs/axm55xx_defconfig b/arch/arm/configs/axm55xx_defconfig index 516689dc6cf1..9b263ea9a878 100644 --- a/arch/arm/configs/axm55xx_defconfig +++ b/arch/arm/configs/axm55xx_defconfig @@ -194,7 +194,7 @@ CONFIG_MAILBOX=y CONFIG_PL320_MBOX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig index 27dc3bf6b124..4a8ac09843d7 100644 --- a/arch/arm/configs/bcm2835_defconfig +++ b/arch/arm/configs/bcm2835_defconfig @@ -154,8 +154,8 @@ CONFIG_PWM_BCM2835=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_FANOTIFY=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index e2ddaca0f89d..673408a10888 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -228,7 +228,7 @@ CONFIG_PWM=y CONFIG_PWM_TIECAP=m CONFIG_PWM_TIEHRPWM=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_XFS_FS=m CONFIG_AUTOFS_FS=m diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig index d76eb12d29a7..bb6c4748bfc8 100644 --- a/arch/arm/configs/dove_defconfig +++ b/arch/arm/configs/dove_defconfig @@ -95,8 +95,8 @@ CONFIG_RTC_DRV_MV=y CONFIG_DMADEVICES=y CONFIG_MV_XOR=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig index 2248afaf35b5..7f3756d8b086 100644 --- a/arch/arm/configs/ep93xx_defconfig +++ b/arch/arm/configs/ep93xx_defconfig @@ -103,8 +103,8 @@ CONFIG_RTC_DRV_EP93XX=y CONFIG_DMADEVICES=y CONFIG_EP93XX_DMA=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 9a57763a8d38..0d55056c6f82 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -436,9 +436,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=y diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig index 3cb995b9616a..81199dddcde7 100644 --- a/arch/arm/configs/ixp4xx_defconfig +++ b/arch/arm/configs/ixp4xx_defconfig @@ -158,8 +158,8 @@ CONFIG_IXP4XX_NPE=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_OVERLAY_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/arm/configs/mmp2_defconfig b/arch/arm/configs/mmp2_defconfig index 842a989baa27..f67e9cda73e2 100644 --- a/arch/arm/configs/mmp2_defconfig +++ b/arch/arm/configs/mmp2_defconfig @@ -53,7 +53,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_MAX8925=y # CONFIG_RESET_CONTROLLER is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig index fa06d98e43fc..e2d9f3610063 100644 --- a/arch/arm/configs/moxart_defconfig +++ b/arch/arm/configs/moxart_defconfig @@ -113,7 +113,7 @@ CONFIG_RTC_DRV_MOXART=y CONFIG_DMADEVICES=y CONFIG_MOXART_DMA=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index b523bc246c09..59b020e66a0b 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -268,7 +268,7 @@ CONFIG_PWM_ATMEL=m CONFIG_PWM_ATMEL_HLCDC_PWM=m CONFIG_PWM_ATMEL_TCB=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_UDF_FS=m diff --git a/arch/arm/configs/mv78xx0_defconfig b/arch/arm/configs/mv78xx0_defconfig index 3343f72de7ea..55f4ab67a306 100644 --- a/arch/arm/configs/mv78xx0_defconfig +++ b/arch/arm/configs/mv78xx0_defconfig @@ -91,8 +91,8 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_M41T80=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/arm/configs/mvebu_v5_defconfig b/arch/arm/configs/mvebu_v5_defconfig index 23dbb80fcc2e..d1742a7cae6a 100644 --- a/arch/arm/configs/mvebu_v5_defconfig +++ b/arch/arm/configs/mvebu_v5_defconfig @@ -168,7 +168,7 @@ CONFIG_MV_XOR=y CONFIG_STAGING=y CONFIG_FB_XGI=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_UDF_FS=m diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig index ea28ed8991b4..696b4fbc2412 100644 --- a/arch/arm/configs/nhk8815_defconfig +++ b/arch/arm/configs/nhk8815_defconfig @@ -116,7 +116,7 @@ CONFIG_IIO_ST_ACCEL_3AXIS=y CONFIG_PWM=y CONFIG_PWM_STMPE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index 661e5d6894bd..24c54bf1e243 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -184,7 +184,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_OMAP=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 939913ed9a73..8f443c20872b 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -679,7 +679,7 @@ CONFIG_TWL4030_USB=m CONFIG_COUNTER=m CONFIG_TI_EQEP=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS_SECURITY=y CONFIG_FANOTIFY=y CONFIG_QUOTA=y diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig index 62b9c6102789..c28426250ec3 100644 --- a/arch/arm/configs/orion5x_defconfig +++ b/arch/arm/configs/orion5x_defconfig @@ -115,8 +115,8 @@ CONFIG_RTC_DRV_M48T86=y CONFIG_DMADEVICES=y CONFIG_MV_XOR=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 1a80602c1284..4b988a9e2768 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -580,9 +580,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig index ec52ccece0ca..5a0513290547 100644 --- a/arch/arm/configs/qcom_defconfig +++ b/arch/arm/configs/qcom_defconfig @@ -295,7 +295,7 @@ CONFIG_INTERCONNECT_QCOM_MSM8974=m CONFIG_INTERCONNECT_QCOM_SDX55=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig index 24f1fa868230..46df453e224e 100644 --- a/arch/arm/configs/rpc_defconfig +++ b/arch/arm/configs/rpc_defconfig @@ -77,7 +77,7 @@ CONFIG_SOUND_VIDC=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_PCF8583=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/arm/configs/s3c6400_defconfig b/arch/arm/configs/s3c6400_defconfig index a37e6ac40825..4c635818973c 100644 --- a/arch/arm/configs/s3c6400_defconfig +++ b/arch/arm/configs/s3c6400_defconfig @@ -53,9 +53,9 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_S3C=y CONFIG_PWM=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_CRAMFS=y diff --git a/arch/arm/configs/sama7_defconfig b/arch/arm/configs/sama7_defconfig index e14720a9a5ac..e2ad9a05566f 100644 --- a/arch/arm/configs/sama7_defconfig +++ b/arch/arm/configs/sama7_defconfig @@ -201,7 +201,7 @@ CONFIG_MCHP_EIC=y CONFIG_RESET_CONTROLLER=y CONFIG_NVMEM_MICROCHIP_OTPC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_AUTOFS_FS=m CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig index 294906c8f16e..f2e42846b116 100644 --- a/arch/arm/configs/socfpga_defconfig +++ b/arch/arm/configs/socfpga_defconfig @@ -136,7 +136,7 @@ CONFIG_FPGA_REGION=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arm/configs/spear13xx_defconfig b/arch/arm/configs/spear13xx_defconfig index a8f992fdb30d..8b19af1ea67c 100644 --- a/arch/arm/configs/spear13xx_defconfig +++ b/arch/arm/configs/spear13xx_defconfig @@ -84,8 +84,8 @@ CONFIG_DMATEST=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=y CONFIG_MSDOS_FS=m diff --git a/arch/arm/configs/spear3xx_defconfig b/arch/arm/configs/spear3xx_defconfig index 8dc5a388759c..b4e4b96a98af 100644 --- a/arch/arm/configs/spear3xx_defconfig +++ b/arch/arm/configs/spear3xx_defconfig @@ -67,8 +67,8 @@ CONFIG_DMATEST=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m diff --git a/arch/arm/configs/spear6xx_defconfig b/arch/arm/configs/spear6xx_defconfig index 4e9e1a6ff381..7083b1bd8573 100644 --- a/arch/arm/configs/spear6xx_defconfig +++ b/arch/arm/configs/spear6xx_defconfig @@ -53,8 +53,8 @@ CONFIG_DMATEST=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index ac2a0f998c73..395df2f9dc8e 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -193,8 +193,8 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index dcd9c316072e..82190b155b14 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -69,7 +69,7 @@ CONFIG_STM32_MDMA=y CONFIG_IIO=y CONFIG_STM32_ADC_CORE=y CONFIG_STM32_ADC=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index ba863b445417..ab477ca13f89 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -319,9 +319,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig index 0f55815eecb3..f3bc967a0f25 100644 --- a/arch/arm/configs/u8500_defconfig +++ b/arch/arm/configs/u8500_defconfig @@ -175,7 +175,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig index cdb6065e04fd..b9454f6954f8 100644 --- a/arch/arm/configs/vexpress_defconfig +++ b/arch/arm/configs/vexpress_defconfig @@ -120,7 +120,7 @@ CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig index c6108f000288..b132752693a9 100644 --- a/arch/hexagon/configs/comet_defconfig +++ b/arch/hexagon/configs/comet_defconfig @@ -46,10 +46,10 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 34eaee0384c9..f3e2c588e5df 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -929,9 +929,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig index 7787a4dd7c3c..f3268fed02fc 100644 --- a/arch/m68k/configs/stmark2_defconfig +++ b/arch/m68k/configs/stmark2_defconfig @@ -72,9 +72,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index 176314f3c9aa..fbbdcb394ca2 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -73,7 +73,7 @@ CONFIG_FB_XILINX=y CONFIG_UIO=y CONFIG_UIO_PDRV_GENIRQ=y CONFIG_UIO_DMEM_GENIRQ=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_TMPFS=y CONFIG_CRAMFS=y diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index 97d2cd997285..349e9e0b4f54 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -144,9 +144,9 @@ CONFIG_EXT2_FS=m CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=m +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig index b0b551efac7c..6ee9ee391fdc 100644 --- a/arch/mips/configs/cobalt_defconfig +++ b/arch/mips/configs/cobalt_defconfig @@ -59,9 +59,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig index 85a4472cb058..52a63dd7aac7 100644 --- a/arch/mips/configs/decstation_64_defconfig +++ b/arch/mips/configs/decstation_64_defconfig @@ -133,9 +133,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index a3b2c8da2dde..59fb7ee5eeb0 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig @@ -129,9 +129,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig index a476717b8a6a..8be1cb433e95 100644 --- a/arch/mips/configs/decstation_r4k_defconfig +++ b/arch/mips/configs/decstation_r4k_defconfig @@ -129,9 +129,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index cdedbb8a8f53..b6fe3c962464 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -173,7 +173,7 @@ CONFIG_USB_ISIGHTFW=m CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=y diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index 2decf8b98d31..e123848f94ab 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -232,9 +232,9 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_RTC_DRV_DS1286=y CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_QUOTA=y diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 5d079941fd20..1c10242b148b 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -272,9 +272,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig index 6db21e498faa..755cbf20f5a5 100644 --- a/arch/mips/configs/ip28_defconfig +++ b/arch/mips/configs/ip28_defconfig @@ -49,9 +49,9 @@ CONFIG_WATCHDOG=y CONFIG_INDYDOG=y # CONFIG_VGA_CONSOLE is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_PROC_KCORE=y # CONFIG_PROC_PAGE_MONITOR is not set diff --git a/arch/mips/configs/ip30_defconfig b/arch/mips/configs/ip30_defconfig index a4524e785469..718f3060d9fa 100644 --- a/arch/mips/configs/ip30_defconfig +++ b/arch/mips/configs/ip30_defconfig @@ -143,9 +143,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index d8ac11427f69..7568838eb08b 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -89,9 +89,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index 65adb538030d..a790c2610fd3 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -69,7 +69,7 @@ CONFIG_FB_G364=y CONFIG_FRAMEBUFFER_CONSOLE=y # CONFIG_HWMON is not set CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 5038a27d035f..8d3f20ed19b5 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -226,9 +226,9 @@ CONFIG_MMC=m CONFIG_LEDS_CLASS=y CONFIG_STAGING=y CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_XFS_FS=m diff --git a/arch/mips/configs/loongson1b_defconfig b/arch/mips/configs/loongson1b_defconfig index 68207b31dc20..a64a39447963 100644 --- a/arch/mips/configs/loongson1b_defconfig +++ b/arch/mips/configs/loongson1b_defconfig @@ -94,9 +94,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/loongson1c_defconfig b/arch/mips/configs/loongson1c_defconfig index c3910a9dee9e..86d7f64a164a 100644 --- a/arch/mips/configs/loongson1c_defconfig +++ b/arch/mips/configs/loongson1c_defconfig @@ -95,9 +95,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/loongson2k_defconfig b/arch/mips/configs/loongson2k_defconfig index 0cc665d3ea34..aec1fd1902eb 100644 --- a/arch/mips/configs/loongson2k_defconfig +++ b/arch/mips/configs/loongson2k_defconfig @@ -298,9 +298,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 240efff37d98..575aaf242361 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -348,9 +348,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_POSIX_ACL=y CONFIG_QUOTA=y diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 9fcbac829920..81704ec67f09 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -313,7 +313,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index 19102386a81c..82a97f58bce1 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -319,7 +319,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index 1b98f6945c2d..accb471a1d93 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -148,7 +148,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index 7b8905cb3400..6bda67c5f68f 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -149,7 +149,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index 8249f6a51895..e4082537f80f 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -148,9 +148,9 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index 21cb37668763..58f5af45fa98 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -152,7 +152,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index 3df9cd669683..9bfef7de0d1c 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -148,7 +148,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 1dd07c9d1812..0f9ef20744f9 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -319,7 +319,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index e4bcdb64df6c..b4dc2255a81c 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -594,9 +594,9 @@ CONFIG_EXT2_FS=m CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=m +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 39a2419e1f3e..b507dc4dddd4 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -307,7 +307,7 @@ CONFIG_USB_SISUSBVGA=m CONFIG_USB_LD=m CONFIG_USB_TEST=m CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/openrisc/configs/or1klitex_defconfig b/arch/openrisc/configs/or1klitex_defconfig index 3e849d25838a..fb1eb9a68bd6 100644 --- a/arch/openrisc/configs/or1klitex_defconfig +++ b/arch/openrisc/configs/or1klitex_defconfig @@ -38,7 +38,7 @@ CONFIG_MMC_LITEX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_LITEX_SOC_CONTROLLER=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_EXFAT_FS=y diff --git a/arch/openrisc/configs/virt_defconfig b/arch/openrisc/configs/virt_defconfig index c1b69166c500..01d685f3fb17 100644 --- a/arch/openrisc/configs/virt_defconfig +++ b/arch/openrisc/configs/virt_defconfig @@ -94,8 +94,8 @@ CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 94928d114d4c..52031bde9f17 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -232,8 +232,8 @@ CONFIG_AUXDISPLAY=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V2=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index d8cd7f858b2a..1aec04c09d0b 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -251,8 +251,8 @@ CONFIG_STAGING=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_BTRFS_FS=m CONFIG_QUOTA=y diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index b6f36c938f1d..336dbacd89bd 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -81,10 +81,10 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig index 9c2644443c4d..59daf99ea745 100644 --- a/arch/sh/configs/apsh4a3a_defconfig +++ b/arch/sh/configs/apsh4a3a_defconfig @@ -60,7 +60,7 @@ CONFIG_FONT_8x16=y CONFIG_LOGO=y # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index 137573610ec4..df2a669ea9d8 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -88,7 +88,7 @@ CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index e76694aace25..dd7e54c451d6 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -109,10 +109,10 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig index f427a95bcd21..711db47b65ba 100644 --- a/arch/sh/configs/edosk7760_defconfig +++ b/arch/sh/configs/edosk7760_defconfig @@ -87,7 +87,7 @@ CONFIG_SND_SOC=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index da176f100e00..f8cad1e7a333 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -59,7 +59,7 @@ CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_AUTOFS_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index 924bb3233b0b..08342ceee32e 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -93,7 +93,7 @@ CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m CONFIG_USB_SISUSBVGA=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_ISO9660_FS=m CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 0307bb2be79f..96a21173522d 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -49,7 +49,7 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_RTC_CLASS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig index 93b9aa32dc7c..af7f777b20be 100644 --- a/arch/sh/configs/magicpanelr2_defconfig +++ b/arch/sh/configs/magicpanelr2_defconfig @@ -64,9 +64,9 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_HCTOSYS is not set CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index f28b8c4181c2..11f210517f76 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -74,7 +74,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index 3a4239f20ff1..ae367d7a14a8 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -69,7 +69,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig index e4ef259425c4..3aba0102304f 100644 --- a/arch/sh/configs/rsk7264_defconfig +++ b/arch/sh/configs/rsk7264_defconfig @@ -59,7 +59,7 @@ CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_VFAT_FS=y CONFIG_NFS_FS=y diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig index e0d1560b2bfd..f82f280fc55a 100644 --- a/arch/sh/configs/rsk7269_defconfig +++ b/arch/sh/configs/rsk7269_defconfig @@ -43,7 +43,7 @@ CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_VFAT_FS=y CONFIG_NFS_FS=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index 9870d16d9711..3b51195bf1b5 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -102,9 +102,9 @@ CONFIG_LEDS_CLASS=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index 07894f13441e..ebb3f4420ae8 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -161,7 +161,7 @@ CONFIG_STAGING=y # CONFIG_STAGING_EXCLUDE_BUILD is not set CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_XFS_FS=y diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig index 75db12fb9ad1..6ef546ee8a32 100644 --- a/arch/sh/configs/se7343_defconfig +++ b/arch/sh/configs/se7343_defconfig @@ -84,7 +84,7 @@ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_ISP116X_HCD=y CONFIG_UIO=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 8770a72e6a63..4cecf9c06a65 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -83,7 +83,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index b15c6406a0e8..c28057b70ad7 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -107,7 +107,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig index 5327a2f70980..88bfd953ef89 100644 --- a/arch/sh/configs/se7722_defconfig +++ b/arch/sh/configs/se7722_defconfig @@ -44,7 +44,7 @@ CONFIG_HW_RANDOM=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index 9501e69eb886..e1b2616ef921 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -110,10 +110,10 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 4d75c92cac10..306e1661fbf5 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -57,9 +57,9 @@ CONFIG_WATCHDOG=y CONFIG_SH_WDT=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index cc6292b3235a..889daa5d2faa 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -95,7 +95,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_DMADEVICES=y CONFIG_TIMB_DMA=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig index 48a0f9beb116..25e9d22779b3 100644 --- a/arch/sh/configs/sh7757lcr_defconfig +++ b/arch/sh/configs/sh7757lcr_defconfig @@ -64,7 +64,7 @@ CONFIG_MMC=y CONFIG_MMC_SDHI=y CONFIG_MMC_SH_MMCIF=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index b77b3313157e..85ec00b7dbd2 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -61,7 +61,7 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_AUTOFS_FS=y CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index 44f9b2317f09..e860a20d3f0f 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -113,7 +113,7 @@ CONFIG_RTC_DRV_RS5C372=y CONFIG_DMADEVICES=y CONFIG_UIO=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig index aec74b0e7003..33c98b4a2adb 100644 --- a/arch/sh/configs/sh7785lcr_defconfig +++ b/arch/sh/configs/sh7785lcr_defconfig @@ -90,7 +90,7 @@ CONFIG_USB_TEST=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index 9a0df5ea3866..3169e4dc7004 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -84,7 +84,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_UIO=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 8ef72b8dbcd3..6bff00038072 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -215,9 +215,9 @@ CONFIG_USB_SERIAL_PL2303=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +# CONFIG_EXT4_FS_XATTR is not set CONFIG_XFS_FS=m CONFIG_FUSE_FS=m CONFIG_ISO9660_FS=m diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig index 103b81ec1ffb..b89eb8f5cc5c 100644 --- a/arch/sh/configs/ul2_defconfig +++ b/arch/sh/configs/ul2_defconfig @@ -66,7 +66,7 @@ CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index 00ef62133b04..60cb716ef195 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -114,7 +114,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_RTC_DRV_GENERIC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_BTRFS_FS=y diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 7a7c4dec2925..200640b93e05 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -187,10 +187,10 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig index f2af1a32c9c7..dc942bbac69f 100644 --- a/arch/xtensa/configs/audio_kc705_defconfig +++ b/arch/xtensa/configs/audio_kc705_defconfig @@ -103,7 +103,7 @@ CONFIG_SND_SIMPLE_CARD=y # CONFIG_USB_SUPPORT is not set CONFIG_COMMON_CLK_CDCE706=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig index 88ed5284e21c..81a057f25f21 100644 --- a/arch/xtensa/configs/cadence_csp_defconfig +++ b/arch/xtensa/configs/cadence_csp_defconfig @@ -80,7 +80,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig index 4427907becca..3ee7e1c56556 100644 --- a/arch/xtensa/configs/generic_kc705_defconfig +++ b/arch/xtensa/configs/generic_kc705_defconfig @@ -90,7 +90,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig index 5828228522ba..c6e96f0aa700 100644 --- a/arch/xtensa/configs/nommu_kc705_defconfig +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -91,7 +91,7 @@ CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 326966ca7831..373d42b9e510 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -94,7 +94,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/virt_defconfig b/arch/xtensa/configs/virt_defconfig index e37048985b47..72628d31e87a 100644 --- a/arch/xtensa/configs/virt_defconfig +++ b/arch/xtensa/configs/virt_defconfig @@ -76,7 +76,7 @@ CONFIG_LOGO=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_INPUT=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/xtensa/configs/xip_kc705_defconfig b/arch/xtensa/configs/xip_kc705_defconfig index ee47438f9b51..5d6013ea70fc 100644 --- a/arch/xtensa/configs/xip_kc705_defconfig +++ b/arch/xtensa/configs/xip_kc705_defconfig @@ -82,7 +82,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y -- cgit v1.2.3 From ca88ecdce5f51874a7c151809bd2c936ee0d3805 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 8 Oct 2025 22:35:15 +0100 Subject: arm64: Revamp HCR_EL2.E2H RES1 detection We currently have two ways to identify CPUs that only implement FEAT_VHE and not FEAT_E2H0: - either they advertise it via ID_AA64MMFR4_EL1.E2H0, - or the HCR_EL2.E2H bit is RAO/WI However, there is a third category of "cpus" that fall between these two cases: on CPUs that do not implement FEAT_FGT, it is IMPDEF whether an access to ID_AA64MMFR4_EL1 can trap to EL2 when the register value is zero. A consequence of this is that on systems such as Neoverse V2, a NV guest cannot reliably detect that it is in a VHE-only configuration (E2H is writable, and ID_AA64MMFR0_EL1 is 0), despite the hypervisor's best effort to repaint the id register. Replace the RAO/WI test by a sequence that makes use of the VHE register remnapping between EL1 and EL2 to detect this situation, and work out whether we get the VHE behaviour even after having set HCR_EL2.E2H to 0. This solves the NV problem, and provides a more reliable acid test for CPUs that do not completely follow the letter of the architecture while providing a RES1 behaviour for HCR_EL2.E2H. Suggested-by: Mark Rutland Acked-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Oliver Upton Tested-by: Jan Kotas Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/15A85F2B-1A0C-4FA7-9FE4-EEC2203CC09E@global.cadence.com --- arch/arm64/include/asm/el2_setup.h | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b37da3ee8529..99a7c0235e6d 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -24,22 +24,48 @@ * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it * can reset into an UNKNOWN state and might not read as 1 until it has * been initialized explicitly. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but - * don't advertise it (they predate this relaxation). - * * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H * indicating whether the CPU is running in E2H mode. */ mrs_s x1, SYS_ID_AA64MMFR4_EL1 sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH cmp x1, #0 - b.ge .LnVHE_\@ + b.lt .LnE2H0_\@ + /* + * Unfortunately, HCR_EL2.E2H can be RES1 even if not advertised + * as such via ID_AA64MMFR4_EL1.E2H0: + * + * - Fruity CPUs predate the !FEAT_E2H0 relaxation, and seem to + * have HCR_EL2.E2H implemented as RAO/WI. + * + * - On CPUs that lack FEAT_FGT, a hypervisor can't trap guest + * reads of ID_AA64MMFR4_EL1 to advertise !FEAT_E2H0. NV + * guests on these hosts can write to HCR_EL2.E2H without + * trapping to the hypervisor, but these writes have no + * functional effect. + * + * Handle both cases by checking for an essential VHE property + * (system register remapping) to decide whether we're + * effectively VHE-only or not. + */ + msr_hcr_el2 x0 // Setup HCR_EL2 as nVHE + isb + mov x1, #1 // Write something to FAR_EL1 + msr far_el1, x1 + isb + mov x1, #2 // Try to overwrite it via FAR_EL2 + msr far_el2, x1 + isb + mrs x1, far_el1 // If we see the latest write in FAR_EL1, + cmp x1, #2 // we can safely assume we are VHE only. + b.ne .LnVHE_\@ // Otherwise, we know that nVHE works. + +.LnE2H0_\@: orr x0, x0, #HCR_E2H -.LnVHE_\@: msr_hcr_el2 x0 isb +.LnVHE_\@: .endm .macro __init_el2_sctlr -- cgit v1.2.3 From 095232711f23179053ca26bcf046ca121a91a465 Mon Sep 17 00:00:00 2001 From: Francesco Valla Date: Fri, 3 Oct 2025 12:33:03 +0200 Subject: drm/draw: fix color truncation in drm_draw_fill24 The color parameter passed to drm_draw_fill24() was truncated to 16 bits, leading to an incorrect color drawn to the target iosys_map. Fix this behavior, widening the parameter to 32 bits. Fixes: 31fa2c1ca0b2 ("drm/panic: Move drawing functions to drm_draw") Signed-off-by: Francesco Valla Reviewed-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20251003-drm_draw_fill24_fix-v1-1-8fb7c1c2a893@valla.it Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_draw.c | 2 +- drivers/gpu/drm/drm_draw_internal.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_draw.c b/drivers/gpu/drm/drm_draw.c index 9dc0408fbbea..5b956229c82f 100644 --- a/drivers/gpu/drm/drm_draw.c +++ b/drivers/gpu/drm/drm_draw.c @@ -127,7 +127,7 @@ EXPORT_SYMBOL(drm_draw_fill16); void drm_draw_fill24(struct iosys_map *dmap, unsigned int dpitch, unsigned int height, unsigned int width, - u16 color) + u32 color) { unsigned int y, x; diff --git a/drivers/gpu/drm/drm_draw_internal.h b/drivers/gpu/drm/drm_draw_internal.h index f121ee7339dc..20cb404e23ea 100644 --- a/drivers/gpu/drm/drm_draw_internal.h +++ b/drivers/gpu/drm/drm_draw_internal.h @@ -47,7 +47,7 @@ void drm_draw_fill16(struct iosys_map *dmap, unsigned int dpitch, void drm_draw_fill24(struct iosys_map *dmap, unsigned int dpitch, unsigned int height, unsigned int width, - u16 color); + u32 color); void drm_draw_fill32(struct iosys_map *dmap, unsigned int dpitch, unsigned int height, unsigned int width, -- cgit v1.2.3 From 2616222e423398bb374ffcb5d23dea4ba2c3e524 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Fri, 10 Oct 2025 12:21:42 +0530 Subject: amd-xgbe: Avoid spurious link down messages during interface toggle During interface toggle operations (ifdown/ifup), the driver currently resets the local helper variable 'phy_link' to -1. This causes the link state machine to incorrectly interpret the state as a link change event, resulting in spurious "Link is down" messages being logged when the interface is brought back up. Preserve the phy_link state across interface toggles to avoid treating the -1 sentinel value as a legitimate link state transition. Fixes: 88131a812b16 ("amd-xgbe: Perform phy connect/disconnect at dev open/stop") Signed-off-by: Raju Rangoju Reviewed-by: Dawid Osuchowski Link: https://patch.msgid.link/20251010065142.1189310-1-Raju.Rangoju@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 1 - drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index f0989aa01855..4dc631af7933 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1080,7 +1080,6 @@ static void xgbe_free_rx_data(struct xgbe_prv_data *pdata) static int xgbe_phy_reset(struct xgbe_prv_data *pdata) { - pdata->phy_link = -1; pdata->phy_speed = SPEED_UNKNOWN; return pdata->phy_if.phy_reset(pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 1a37ec45e650..7675bb98f029 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1555,6 +1555,7 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata) pdata->phy.duplex = DUPLEX_FULL; } + pdata->phy_link = 0; pdata->phy.link = 0; pdata->phy.pause_autoneg = pdata->pause_autoneg; -- cgit v1.2.3 From 7f38a1487555604bc4e210fa7cc9b1bce981c40e Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sun, 12 Oct 2025 07:20:01 -0700 Subject: drm/rockchip: vop2: use correct destination rectangle height check The vop2_plane_atomic_check() function incorrectly checks drm_rect_width(dest) twice instead of verifying both width and height. Fix the second condition to use drm_rect_height(dest) so that invalid destination rectangles with height < 4 are correctly rejected. Fixes: 604be85547ce ("drm/rockchip: Add VOP2 driver") Signed-off-by: Alok Tiwari Reviewed-by: Andy Yan Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20251012142005.660727-1-alok.a.tiwari@oracle.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index b50927a824b4..7ec7bea5e38e 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -1031,7 +1031,7 @@ static int vop2_plane_atomic_check(struct drm_plane *plane, return format; if (drm_rect_width(src) >> 16 < 4 || drm_rect_height(src) >> 16 < 4 || - drm_rect_width(dest) < 4 || drm_rect_width(dest) < 4) { + drm_rect_width(dest) < 4 || drm_rect_height(dest) < 4) { drm_err(vop2->drm, "Invalid size: %dx%d->%dx%d, min size is 4x4\n", drm_rect_width(src) >> 16, drm_rect_height(src) >> 16, drm_rect_width(dest), drm_rect_height(dest)); -- cgit v1.2.3 From 62685ab071de7c39499212bff19f1b5bc0148bc7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 1 Oct 2025 15:24:49 +0200 Subject: uprobe: Move arch_uprobe_optimize right after handlers execution It's less confusing to optimize uprobe right after handlers execution and before we do the check for changed ip register to avoid situations where changed ip register would skip uprobe optimization. Suggested-by: Linus Torvalds Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov --- kernel/events/uprobes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 8709c69118b5..f11ceb8be8c4 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -2765,6 +2765,9 @@ static void handle_swbp(struct pt_regs *regs) handler_chain(uprobe, regs); + /* Try to optimize after first hit. */ + arch_uprobe_optimize(&uprobe->arch, bp_vaddr); + /* * If user decided to take execution elsewhere, it makes little sense * to execute the original instruction, so let's skip it. @@ -2772,9 +2775,6 @@ static void handle_swbp(struct pt_regs *regs) if (instruction_pointer(regs) != bp_vaddr) goto out; - /* Try to optimize after first hit. */ - arch_uprobe_optimize(&uprobe->arch, bp_vaddr); - if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) goto out; -- cgit v1.2.3 From ebfc8542ad62d066771e46c8aa30f5624b89cad8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Oct 2025 10:22:42 +0300 Subject: perf/core: Fix address filter match with backing files It was reported that Intel PT address filters do not work in Docker containers. That relates to the use of overlayfs. overlayfs records the backing file in struct vm_area_struct vm_file, instead of the user file that the user mmapped. In order for an address filter to match, it must compare to the user file inode. There is an existing helper file_user_inode() for that situation. Use file_user_inode() instead of file_inode() to get the inode for address filter matching. Example: Setup: # cd /root # mkdir test ; cd test ; mkdir lower upper work merged # cp `which cat` lower # mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged # perf record --buildid-mmap -e intel_pt//u --filter 'filter * @ /root/test/merged/cat' -- /root/test/merged/cat /proc/self/maps ... 55d61d246000-55d61d2e1000 r-xp 00018000 00:1a 3418 /root/test/merged/cat ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.015 MB perf.data ] # perf buildid-cache --add /root/test/merged/cat Before: Address filter does not match so there are no control flow packets # perf script --itrace=e # perf script --itrace=b | wc -l 0 # perf script -D | grep 'TIP.PGE' | wc -l 0 # After: Address filter does match so there are control flow packets # perf script --itrace=e # perf script --itrace=b | wc -l 235 # perf script -D | grep 'TIP.PGE' | wc -l 57 # With respect to stable kernels, overlayfs mmap function ovl_mmap() was added in v4.19 but file_user_inode() was not added until v6.8 and never back-ported to stable kernels. FMODE_BACKING that it depends on was added in v6.5. This issue has gone largely unnoticed, so back-porting before v6.8 is probably not worth it, so put 6.8 as the stable kernel prerequisite version, although in practice the next long term kernel is 6.12. Closes: https://lore.kernel.org/linux-perf-users/aBCwoq7w8ohBRQCh@fremen.lan Reported-by: Edd Barrett Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Amir Goldstein Cc: stable@vger.kernel.org # 6.8 --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7541f6f85fcb..cd63ec84e386 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9492,7 +9492,7 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter, if (!filter->path.dentry) return false; - if (d_inode(filter->path.dentry) != file_inode(file)) + if (d_inode(filter->path.dentry) != file_user_inode(file)) return false; if (filter->offset > offset + size) -- cgit v1.2.3 From 8818f507a9391019a3ec7c57b1a32e4b386e48a5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Oct 2025 10:22:43 +0300 Subject: perf/core: Fix MMAP event path names with backing files Some file systems like FUSE-based ones or overlayfs may record the backing file in struct vm_area_struct vm_file, instead of the user file that the user mmapped. Since commit def3ae83da02f ("fs: store real path instead of fake path in backing file f_path"), file_path() no longer returns the user file path when applied to a backing file. There is an existing helper file_user_path() for that situation. Use file_user_path() instead of file_path() to get the path for MMAP and MMAP2 events. Example: Setup: # cd /root # mkdir test ; cd test ; mkdir lower upper work merged # cp `which cat` lower # mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged # perf record -e intel_pt//u -- /root/test/merged/cat /proc/self/maps ... 55b0ba399000-55b0ba434000 r-xp 00018000 00:1a 3419 /root/test/merged/cat ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.060 MB perf.data ] # Before: File name is wrong (/cat), so decoding fails: # perf script --no-itrace --show-mmap-events cat 367 [016] 100.491492: PERF_RECORD_MMAP2 367/367: [0x55b0ba399000(0x9b000) @ 0x18000 00:02 3419 489959280]: r-xp /cat ... # perf script --itrace=e | wc -l Warning: 19 instruction trace errors 19 # After: File name is correct (/root/test/merged/cat), so decoding is ok: # perf script --no-itrace --show-mmap-events cat 364 [016] 72.153006: PERF_RECORD_MMAP2 364/364: [0x55ce4003d000(0x9b000) @ 0x18000 00:02 3419 3132534314]: r-xp /root/test/merged/cat # perf script --itrace=e # perf script --itrace=e | wc -l 0 # Fixes: def3ae83da02f ("fs: store real path instead of fake path in backing file f_path") Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Amir Goldstein Cc: stable@vger.kernel.org --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index cd63ec84e386..7b5c2373a8d7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9416,7 +9416,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) * need to add enough zero bytes after the string to handle * the 64bit alignment we do later. */ - name = file_path(file, buf, PATH_MAX - sizeof(u64)); + name = d_path(file_user_path(file), buf, PATH_MAX - sizeof(u64)); if (IS_ERR(name)) { name = "//toolong"; goto cpy_name; -- cgit v1.2.3 From fa4f4bae893fbce8a3edfff1ab7ece0c01dc1328 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Oct 2025 10:22:44 +0300 Subject: perf/core: Fix MMAP2 event device with backing files Some file systems like FUSE-based ones or overlayfs may record the backing file in struct vm_area_struct vm_file, instead of the user file that the user mmapped. That causes perf to misreport the device major/minor numbers of the file system of the file, and the generation of the file, and potentially other inode details. There is an existing helper file_user_inode() for that situation. Use file_user_inode() instead of file_inode() to get the inode for MMAP2 events. Example: Setup: # cd /root # mkdir test ; cd test ; mkdir lower upper work merged # cp `which cat` lower # mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged # perf record -e cycles:u -- /root/test/merged/cat /proc/self/maps ... 55b2c91d0000-55b2c926b000 r-xp 00018000 00:1a 3419 /root/test/merged/cat ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.004 MB perf.data (5 samples) ] # # stat /root/test/merged/cat File: /root/test/merged/cat Size: 1127792 Blocks: 2208 IO Block: 4096 regular file Device: 0,26 Inode: 3419 Links: 1 Access: (0755/-rwxr-xr-x) Uid: ( 0/ root) Gid: ( 0/ root) Access: 2025-09-08 12:23:59.453309624 +0000 Modify: 2025-09-08 12:23:59.454309624 +0000 Change: 2025-09-08 12:23:59.454309624 +0000 Birth: 2025-09-08 12:23:59.453309624 +0000 Before: Device reported 00:02 differs from stat output and /proc/self/maps # perf script --show-mmap-events | grep /root/test/merged/cat cat 377 [-01] 243.078558: PERF_RECORD_MMAP2 377/377: [0x55b2c91d0000(0x9b000) @ 0x18000 00:02 3419 2068525940]: r-xp /root/test/merged/cat After: Device reported 00:1a is the same as stat output and /proc/self/maps # perf script --show-mmap-events | grep /root/test/merged/cat cat 362 [-01] 127.755167: PERF_RECORD_MMAP2 362/362: [0x55ba6e781000(0x9b000) @ 0x18000 00:1a 3419 0]: r-xp /root/test/merged/cat With respect to stable kernels, overlayfs mmap function ovl_mmap() was added in v4.19 but file_user_inode() was not added until v6.8 and never back-ported to stable kernels. FMODE_BACKING that it depends on was added in v6.5. This issue has gone largely unnoticed, so back-porting before v6.8 is probably not worth it, so put 6.8 as the stable kernel prerequisite version, although in practice the next long term kernel is 6.12. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Amir Goldstein Cc: stable@vger.kernel.org # 6.8 --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7b5c2373a8d7..177e57c1a362 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9403,7 +9403,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) flags |= MAP_HUGETLB; if (file) { - struct inode *inode; + const struct inode *inode; dev_t dev; buf = kmalloc(PATH_MAX, GFP_KERNEL); @@ -9421,7 +9421,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) name = "//toolong"; goto cpy_name; } - inode = file_inode(vma->vm_file); + inode = file_user_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; gen = inode->i_generation; -- cgit v1.2.3 From 6c26c055523d915afb8d18e7277848eff66a3085 Mon Sep 17 00:00:00 2001 From: Xinpeng Sun Date: Thu, 9 Oct 2025 11:31:08 +0800 Subject: HID: intel-thc-hid: intel-quicki2c: Fix wrong type casting The type definition of qcdev->i2c_max_frame_size is already u32, so remove the unnecessary type casting le16_to_cpu. Signed-off-by: Xinpeng Sun Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202509280841.pxmgBzKW-lkp@intel.com/ Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c index 8433a991e7f4..0156ab391778 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c @@ -466,7 +466,7 @@ static void quicki2c_dma_adv_enable(struct quicki2c_device *qcdev) dev_warn(qcdev->dev, "Max frame size is smaller than hid max input length!"); thc_i2c_set_rx_max_size(qcdev->thc_hw, - le16_to_cpu(qcdev->i2c_max_frame_size)); + qcdev->i2c_max_frame_size); } thc_i2c_rx_max_size_enable(qcdev->thc_hw, true); } -- cgit v1.2.3 From 8fe2cd8ec84b3592b57f40b080f9d5aeebd553af Mon Sep 17 00:00:00 2001 From: Even Xu Date: Fri, 19 Sep 2025 15:09:39 +0800 Subject: HID: intel-thc-hid: Intel-quickspi: switch first interrupt from level to edge detection The original implementation used level detection for the first interrupt after device reset to avoid potential interrupt line noise and missed interrupts during the initialization phase. However, this approach introduced unintended side effects when tested with certain touch panels, including: - Delayed hardware interrupt response - Multiple spurious interrupt triggers Switching back to edge detection for the first interrupt resolves these issues while maintaining reliable interrupt handling. Extensive testing across multiple platforms with touch panels from various vendors confirms this change introduces no regressions. [jkosina@suse.com: properly capitalize shortlog] Fixes: 9d8d51735a3a ("HID: intel-thc-hid: intel-quickspi: Add HIDSPI protocol implementation") Tested-by: Rui Zhang Signed-off-by: Even Xu Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c index e6ba2ddcc9cb..16f780bc879b 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c @@ -280,8 +280,7 @@ int reset_tic(struct quickspi_device *qsdev) qsdev->reset_ack = false; - /* First interrupt uses level trigger to avoid missing interrupt */ - thc_int_trigger_type_select(qsdev->thc_hw, false); + thc_int_trigger_type_select(qsdev->thc_hw, true); ret = acpi_tic_reset(qsdev); if (ret) -- cgit v1.2.3 From 50f1f782f8d621a90108340c632bcb6ab4307d2e Mon Sep 17 00:00:00 2001 From: Abhishek Tamboli Date: Wed, 24 Sep 2025 10:07:20 +0530 Subject: HID: intel-thc-hid: intel-quickspi: Add ARL PCI Device Id's Add the missing PCI ID for the quickspi device used on the Lenovo Yoga Pro 9i 16IAH10. Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=220567 Signed-off-by: Abhishek Tamboli Reviewed-by: Even Xu Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c | 6 ++++++ drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c index 84314989dc53..14cabd5dc6dd 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c @@ -33,6 +33,10 @@ struct quickspi_driver_data ptl = { .max_packet_size_value = MAX_PACKET_SIZE_VALUE_LNL, }; +struct quickspi_driver_data arl = { + .max_packet_size_value = MAX_PACKET_SIZE_VALUE_MTL, +}; + /* THC QuickSPI ACPI method to get device properties */ /* HIDSPI Method: {6e2ac436-0fcf-41af-a265-b32a220dcfab} */ static guid_t hidspi_guid = @@ -978,6 +982,8 @@ static const struct pci_device_id quickspi_pci_tbl[] = { {PCI_DEVICE_DATA(INTEL, THC_PTL_U_DEVICE_ID_SPI_PORT2, &ptl), }, {PCI_DEVICE_DATA(INTEL, THC_WCL_DEVICE_ID_SPI_PORT1, &ptl), }, {PCI_DEVICE_DATA(INTEL, THC_WCL_DEVICE_ID_SPI_PORT2, &ptl), }, + {PCI_DEVICE_DATA(INTEL, THC_ARL_DEVICE_ID_SPI_PORT1, &arl), }, + {PCI_DEVICE_DATA(INTEL, THC_ARL_DEVICE_ID_SPI_PORT2, &arl), }, {} }; MODULE_DEVICE_TABLE(pci, quickspi_pci_tbl); diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h index f3532d866749..c30e1a42eb09 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h @@ -21,6 +21,8 @@ #define PCI_DEVICE_ID_INTEL_THC_PTL_U_DEVICE_ID_SPI_PORT2 0xE44B #define PCI_DEVICE_ID_INTEL_THC_WCL_DEVICE_ID_SPI_PORT1 0x4D49 #define PCI_DEVICE_ID_INTEL_THC_WCL_DEVICE_ID_SPI_PORT2 0x4D4B +#define PCI_DEVICE_ID_INTEL_THC_ARL_DEVICE_ID_SPI_PORT1 0x7749 +#define PCI_DEVICE_ID_INTEL_THC_ARL_DEVICE_ID_SPI_PORT2 0x774B /* HIDSPI special ACPI parameters DSM methods */ #define ACPI_QUICKSPI_REVISION_NUM 2 -- cgit v1.2.3 From 362f21536966d7039da1de762f28f4ad44565acc Mon Sep 17 00:00:00 2001 From: Deepak Sharma Date: Fri, 26 Sep 2025 20:28:11 +0530 Subject: HID: cp2112: Add parameter validation to data length Syzkaller reported a stack OOB access in cp2112_write_req caused by lack of parameter validation for the user input in I2C SMBUS ioctl in cp2112 driver Add the parameter validation for the data->block[0] to be bounded by I2C_SMBUS_BLOCK_MAX + the additional compatibility padding [jkosina@suse.com: fix whitespace damage] Reported-by: syzbot+7617e19c8a59edfbd879@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7617e19c8a59edfbd879 Tested-by: syzbot+7617e19c8a59edfbd879@syzkaller.appspotmail.com Signed-off-by: Deepak Sharma Signed-off-by: Jiri Kosina --- drivers/hid/hid-cp2112.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 5a95ea3bec98..803b883ae875 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -689,7 +689,14 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, count = cp2112_write_read_req(buf, addr, read_length, command, NULL, 0); } else { - count = cp2112_write_req(buf, addr, command, + /* Copy starts from data->block[1] so the length can + * be at max I2C_SMBUS_CLOCK_MAX + 1 + */ + + if (data->block[0] > I2C_SMBUS_BLOCK_MAX + 1) + count = -EINVAL; + else + count = cp2112_write_req(buf, addr, command, data->block + 1, data->block[0]); } @@ -700,7 +707,14 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, I2C_SMBUS_BLOCK_MAX, command, NULL, 0); } else { - count = cp2112_write_req(buf, addr, command, + /* data_length here is data->block[0] + 1 + * so make sure that the data->block[0] is + * less than or equals I2C_SMBUS_BLOCK_MAX + 1 + */ + if (data->block[0] > I2C_SMBUS_BLOCK_MAX + 1) + count = -EINVAL; + else + count = cp2112_write_req(buf, addr, command, data->block, data->block[0] + 1); } @@ -709,7 +723,14 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, size = I2C_SMBUS_BLOCK_DATA; read_write = I2C_SMBUS_READ; - count = cp2112_write_read_req(buf, addr, I2C_SMBUS_BLOCK_MAX, + /* data_length is data->block[0] + 1, so + * so data->block[0] should be less than or + * equal to the I2C_SMBUS_BLOCK_MAX + 1 + */ + if (data->block[0] > I2C_SMBUS_BLOCK_MAX + 1) + count = -EINVAL; + else + count = cp2112_write_read_req(buf, addr, I2C_SMBUS_BLOCK_MAX, command, data->block, data->block[0] + 1); break; -- cgit v1.2.3 From c5705a2a4aa35350e504b72a94b5c71c3754833c Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 10 Oct 2025 13:42:39 -0700 Subject: Octeontx2-af: Fix missing error code in cgx_probe() When CGX fails mapping to NIX, set the error code to -ENODEV, currently err is zero and that is treated as success path. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aLAdlCg2_Yv7Y-3h@stanley.mountain/ Fixes: d280233fc866 ("Octeontx2-af: Fix NIX X2P calibration failures") Signed-off-by: Harshit Mogalapalli Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251010204239.94237-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index d374a4454836..ec0e11c77cbf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1981,6 +1981,7 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) !is_cgx_mapped_to_nix(pdev->subsystem_device, cgx->cgx_id)) { dev_notice(dev, "CGX %d not mapped to NIX, skipping probe\n", cgx->cgx_id); + err = -ENODEV; goto err_release_regions; } -- cgit v1.2.3 From 0be4253bf878d9aaa2b96031ac8683fceeb81480 Mon Sep 17 00:00:00 2001 From: Tristan Lobb Date: Sun, 28 Sep 2025 18:25:43 +0200 Subject: HID: quirks: avoid Cooler Master MM712 dongle wakeup bug The Cooler Master Mice Dongle includes a vendor defined HID interface alongside its mouse interface. Not polling it will cause the mouse to stop responding to polls on any interface once woken up again after going into power saving mode. Add the HID_QUIRK_ALWAYS_POLL quirk alongside the Cooler Master VID and the Dongle's PID. Signed-off-by: Tristan Lobb Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-quirks.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 5721b8414bbd..d05a62bbafff 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -342,6 +342,9 @@ #define USB_DEVICE_ID_CODEMERCS_IOW_FIRST 0x1500 #define USB_DEVICE_ID_CODEMERCS_IOW_LAST 0x15ff +#define USB_VENDOR_ID_COOLER_MASTER 0x2516 +#define USB_DEVICE_ID_COOLER_MASTER_MICE_DONGLE 0x01b7 + #define USB_VENDOR_ID_CORSAIR 0x1b1c #define USB_DEVICE_ID_CORSAIR_K90 0x1b02 #define USB_DEVICE_ID_CORSAIR_K70R 0x1b09 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index ffd034566e2e..d7105a839598 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -57,6 +57,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_THROTTLE), HID_QUIRK_NOGET }, + { HID_USB_DEVICE(USB_VENDOR_ID_COOLER_MASTER, USB_DEVICE_ID_COOLER_MASTER_MICE_DONGLE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB), HID_QUIRK_NO_INIT_REPORTS }, -- cgit v1.2.3 From 1141ed52348d3df82d3fd2316128b3fc6203a68c Mon Sep 17 00:00:00 2001 From: Oleg Makarenko Date: Mon, 29 Sep 2025 18:46:11 +0300 Subject: HID: quirks: Add ALWAYS_POLL quirk for VRS R295 steering wheel This patch adds ALWAYS_POLL quirk for the VRS R295 steering wheel joystick. This device reboots itself every 8-10 seconds if it is not polled. Signed-off-by: Oleg Makarenko Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index d05a62bbafff..0723b4b1c9ec 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1435,6 +1435,7 @@ #define USB_VENDOR_ID_VRS 0x0483 #define USB_DEVICE_ID_VRS_DFP 0xa355 +#define USB_DEVICE_ID_VRS_R295 0xa44c #define USB_VENDOR_ID_VTL 0x0306 #define USB_DEVICE_ID_VTL_MULTITOUCH_FF3F 0xff3f diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index d7105a839598..bcd4bccf1a7c 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -207,6 +207,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_WP5540), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_VRS, USB_DEVICE_ID_VRS_R295), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET), HID_QUIRK_MULTI_INPUT }, -- cgit v1.2.3 From 327cd4b68b4398b6c24f10eb2b2533ffbfc10185 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Sat, 11 Oct 2025 15:05:18 +0800 Subject: usbnet: Fix using smp_processor_id() in preemptible code warnings Syzbot reported the following warning: BUG: using smp_processor_id() in preemptible [00000000] code: dhcpcd/2879 caller is usbnet_skb_return+0x74/0x490 drivers/net/usb/usbnet.c:331 CPU: 1 UID: 0 PID: 2879 Comm: dhcpcd Not tainted 6.15.0-rc4-syzkaller-00098-g615dca38c2ea #0 PREEMPT(voluntary) Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x16c/0x1f0 lib/dump_stack.c:120 check_preemption_disabled+0xd0/0xe0 lib/smp_processor_id.c:49 usbnet_skb_return+0x74/0x490 drivers/net/usb/usbnet.c:331 usbnet_resume_rx+0x4b/0x170 drivers/net/usb/usbnet.c:708 usbnet_change_mtu+0x1be/0x220 drivers/net/usb/usbnet.c:417 __dev_set_mtu net/core/dev.c:9443 [inline] netif_set_mtu_ext+0x369/0x5c0 net/core/dev.c:9496 netif_set_mtu+0xb0/0x160 net/core/dev.c:9520 dev_set_mtu+0xae/0x170 net/core/dev_api.c:247 dev_ifsioc+0xa31/0x18d0 net/core/dev_ioctl.c:572 dev_ioctl+0x223/0x10e0 net/core/dev_ioctl.c:821 sock_do_ioctl+0x19d/0x280 net/socket.c:1204 sock_ioctl+0x42f/0x6a0 net/socket.c:1311 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl fs/ioctl.c:892 [inline] __x64_sys_ioctl+0x190/0x200 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0x260 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f For historical and portability reasons, the netif_rx() is usually run in the softirq or interrupt context, this commit therefore add local_bh_disable/enable() protection in the usbnet_resume_rx(). Fixes: 43daa96b166c ("usbnet: Stop RX Q on MTU change") Link: https://syzkaller.appspot.com/bug?id=81f55dfa587ee544baaaa5a359a060512228c1e1 Suggested-by: Jakub Kicinski Signed-off-by: Zqiang Link: https://patch.msgid.link/20251011070518.7095-1-qiang.zhang@linux.dev Signed-off-by: Paolo Abeni --- drivers/net/usb/usbnet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 511c4154cf74..bf01f2728531 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -702,6 +702,7 @@ void usbnet_resume_rx(struct usbnet *dev) struct sk_buff *skb; int num = 0; + local_bh_disable(); clear_bit(EVENT_RX_PAUSED, &dev->flags); while ((skb = skb_dequeue(&dev->rxq_pause)) != NULL) { @@ -710,6 +711,7 @@ void usbnet_resume_rx(struct usbnet *dev) } queue_work(system_bh_wq, &dev->bh_work); + local_bh_enable(); netif_dbg(dev, rx_status, dev->net, "paused rx queue disabled, %d skbs requeued\n", num); -- cgit v1.2.3 From 1d64624243af8329b4b219d8c39e28ea448f9929 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Mon, 6 Oct 2025 18:05:31 -0700 Subject: HID: core: Add printk_ratelimited variants to hid_warn() etc hid_warn_ratelimited() is needed. Add the others as part of the block. Signed-off-by: Vicki Pfau Signed-off-by: Jiri Kosina --- include/linux/hid.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/hid.h b/include/linux/hid.h index e1b673ad7457..a4ddb94e3ee5 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1292,4 +1292,15 @@ void hid_quirks_exit(__u16 bus); #define hid_dbg_once(hid, fmt, ...) \ dev_dbg_once(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_err_ratelimited(hid, fmt, ...) \ + dev_err_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_notice_ratelimited(hid, fmt, ...) \ + dev_notice_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_warn_ratelimited(hid, fmt, ...) \ + dev_warn_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_info_ratelimited(hid, fmt, ...) \ + dev_info_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_dbg_ratelimited(hid, fmt, ...) \ + dev_dbg_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) + #endif -- cgit v1.2.3 From b73bc6a51f0c0066912c7e181acee41091c70fe6 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Mon, 6 Oct 2025 18:05:32 -0700 Subject: HID: nintendo: Wait longer for initial probe Some third-party controllers, such as the PB Tails CHOC, won't always respond quickly on startup. Since this packet is needed for probe, and only once during probe, let's just wait an extra second, which makes connecting consistent. Signed-off-by: Vicki Pfau Signed-off-by: Jiri Kosina --- drivers/hid/hid-nintendo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index fb4985988615..e3e54f1df44f 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -2420,7 +2420,7 @@ static int joycon_read_info(struct joycon_ctlr *ctlr) struct joycon_input_report *report; req.subcmd_id = JC_SUBCMD_REQ_DEV_INFO; - ret = joycon_send_subcmd(ctlr, &req, 0, HZ); + ret = joycon_send_subcmd(ctlr, &req, 0, 2 * HZ); if (ret) { hid_err(ctlr->hdev, "Failed to get joycon info; ret=%d\n", ret); return ret; -- cgit v1.2.3 From b8874720b2f33a06ff1d4cf3827e7ec1195cb360 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Mon, 6 Oct 2025 18:05:33 -0700 Subject: HID: nintendo: Rate limit IMU compensation message Some controllers are very bad at updating the IMU, leading to these messages spamming the syslog. Rate-limiting them helps with this a bit. Signed-off-by: Vicki Pfau Signed-off-by: Jiri Kosina --- drivers/hid/hid-nintendo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index e3e54f1df44f..c2849a541f65 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -1455,10 +1455,10 @@ static void joycon_parse_imu_report(struct joycon_ctlr *ctlr, ctlr->imu_avg_delta_ms; ctlr->imu_timestamp_us += 1000 * ctlr->imu_avg_delta_ms; if (dropped_pkts > JC_IMU_DROPPED_PKT_WARNING) { - hid_warn(ctlr->hdev, + hid_warn_ratelimited(ctlr->hdev, "compensating for %u dropped IMU reports\n", dropped_pkts); - hid_warn(ctlr->hdev, + hid_warn_ratelimited(ctlr->hdev, "delta=%u avg_delta=%u\n", delta, ctlr->imu_avg_delta_ms); } -- cgit v1.2.3 From 75527d61d60d493d1eb064f335071a20ca581f54 Mon Sep 17 00:00:00 2001 From: Yi Cong Date: Sat, 11 Oct 2025 16:24:15 +0800 Subject: r8152: add error handling in rtl8152_driver_init rtl8152_driver_init() is missing the error handling. When rtl8152_driver registration fails, rtl8152_cfgselector_driver should be deregistered. Fixes: ec51fbd1b8a2 ("r8152: add USB device driver for config selection") Cc: stable@vger.kernel.org Signed-off-by: Yi Cong Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251011082415.580740-1-yicongsrfy@163.com [pabeni@redhat.com: clarified the commit message] Signed-off-by: Paolo Abeni --- drivers/net/usb/r8152.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 44cba7acfe7d..a22d4bb2cf3b 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10122,7 +10122,12 @@ static int __init rtl8152_driver_init(void) ret = usb_register_device_driver(&rtl8152_cfgselector_driver, THIS_MODULE); if (ret) return ret; - return usb_register(&rtl8152_driver); + + ret = usb_register(&rtl8152_driver); + if (ret) + usb_deregister_device_driver(&rtl8152_cfgselector_driver); + + return ret; } static void __exit rtl8152_driver_exit(void) -- cgit v1.2.3 From 083a4f3f3cc7d107728c8f297e4f6276f0876b2d Mon Sep 17 00:00:00 2001 From: Jonathan Denose Date: Mon, 13 Oct 2025 20:54:57 +0000 Subject: HID: Kconfig: Fix build error from CONFIG_HID_HAPTIC Temporarily change CONFIG_HID_HAPTIC to be bool instead of tristate, until we implement a permanent solution. Recently the CONFIG_HID_HAPTIC Kconfig option was reported as causing the following build errors: MODPOST Module.symvers ERROR: modpost: "hid_haptic_init" [drivers/hid/hid-multitouch.ko] undefined! ERROR: modpost: "hid_haptic_pressure_increase" [drivers/hid/hid-multitouch.ko] undefined! ERROR: modpost: "hid_haptic_check_pressure_unit" [drivers/hid/hid-multitouch.ko] undefined! ERROR: modpost: "hid_haptic_input_configured" [drivers/hid/hid-multitouch.ko] undefined! ERROR: modpost: "hid_haptic_input_mapping" [drivers/hid/hid-multitouch.ko] undefined! ERROR: modpost: "hid_haptic_feature_mapping" [drivers/hid/hid-multitouch.ko] undefined! ERROR: modpost: "hid_haptic_pressure_reset" [drivers/hid/hid-multitouch.ko] undefined! make[3]: *** [/home/thl/var/linux.dev/scripts/Makefile.modpost:147: Module.symvers] Error 1 when the kernel is compiled with the following configuration: CONFIG_HID=y CONFIG_HID_MULTITOUCH=m CONFIG_HID_HAPTIC=m To resolve this, temporarily change the CONFIG_HID_HAPTIC option to be bool, until we arrive at a permanent solution to enable CONFIG_HID_HAPTIC to be tristate. For a more detailed discussion, see [1]. [1]: https://lore.kernel.org/linux-input/auypydfkhx2eg7vp764way4batdilzc35inqda3exwzs3tk3ff@oagat6g46zto/ Signed-off-by: Jonathan Denose Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 7ff85c7200e5..986de05a9787 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -93,7 +93,7 @@ config HID_GENERIC If unsure, say Y. config HID_HAPTIC - tristate "Haptic touchpad support" + bool "Haptic touchpad support" default n help Support for touchpads with force sensors and haptic actuators instead of a -- cgit v1.2.3 From 295ce1eb36ae47dc862d6c8a1012618a25516208 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 11 Oct 2025 11:57:42 +0000 Subject: tcp: fix tcp_tso_should_defer() vs large RTT Neal reported that using neper tcp_stream with TCP_TX_DELAY set to 50ms would often lead to flows stuck in a small cwnd mode, regardless of the congestion control. While tcp_stream sets TCP_TX_DELAY too late after the connect(), it highlighted two kernel bugs. The following heuristic in tcp_tso_should_defer() seems wrong for large RTT: delta = tp->tcp_clock_cache - head->tstamp; /* If next ACK is likely to come too late (half srtt), do not defer */ if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0) goto send_now; If next ACK is expected to come in more than 1 ms, we should not defer because we prefer a smooth ACK clocking. While blamed commit was a step in the good direction, it was not generic enough. Another patch fixing TCP_TX_DELAY for established flows will be proposed when net-next reopens. Fixes: 50c8339e9299 ("tcp: tso: restore IW10 after TSO autosizing") Reported-by: Neal Cardwell Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Tested-by: Neal Cardwell Link: https://patch.msgid.link/20251011115742.1245771-1-edumazet@google.com [pabeni@redhat.com: fixed whitespace issue] Signed-off-by: Paolo Abeni --- net/ipv4/tcp_output.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bb3576ac0ad7..b94efb3050d2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2369,7 +2369,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, u32 max_segs) { const struct inet_connection_sock *icsk = inet_csk(sk); - u32 send_win, cong_win, limit, in_flight; + u32 send_win, cong_win, limit, in_flight, threshold; + u64 srtt_in_ns, expected_ack, how_far_is_the_ack; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *head; int win_divisor; @@ -2431,9 +2432,19 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, head = tcp_rtx_queue_head(sk); if (!head) goto send_now; - delta = tp->tcp_clock_cache - head->tstamp; - /* If next ACK is likely to come too late (half srtt), do not defer */ - if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0) + + srtt_in_ns = (u64)(NSEC_PER_USEC >> 3) * tp->srtt_us; + /* When is the ACK expected ? */ + expected_ack = head->tstamp + srtt_in_ns; + /* How far from now is the ACK expected ? */ + how_far_is_the_ack = expected_ack - tp->tcp_clock_cache; + + /* If next ACK is likely to come too late, + * ie in more than min(1ms, half srtt), do not defer. + */ + threshold = min(srtt_in_ns >> 1, NSEC_PER_MSEC); + + if ((s64)(how_far_is_the_ack - threshold) > 0) goto send_now; /* Ok, it looks like it is advisable to defer. -- cgit v1.2.3 From bd5afca115f181c85f992d42a57cd497bc823ccb Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 12 Oct 2025 11:19:44 +0200 Subject: net: airoha: Take into account out-of-order tx completions in airoha_dev_xmit() Completion napi can free out-of-order tx descriptors if hw QoS is enabled and packets with different priority are queued to same DMA ring. Take into account possible out-of-order reports checking if the tx queue is full using circular buffer head/tail pointer instead of the number of queued packets. Fixes: 23020f0493270 ("net: airoha: Introduce ethernet support for EN7581 SoC") Suggested-by: Simon Horman Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251012-airoha-tx-busy-queue-v2-1-a600b08bab2d@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 833dd911980b..433a646e9831 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1873,6 +1873,20 @@ static u32 airoha_get_dsa_tag(struct sk_buff *skb, struct net_device *dev) #endif } +static bool airoha_dev_tx_queue_busy(struct airoha_queue *q, u32 nr_frags) +{ + u32 tail = q->tail <= q->head ? q->tail + q->ndesc : q->tail; + u32 index = q->head + nr_frags; + + /* completion napi can free out-of-order tx descriptors if hw QoS is + * enabled and packets with different priorities are queued to the same + * DMA ring. Take into account possible out-of-order reports checking + * if the tx queue is full using circular buffer head/tail pointers + * instead of the number of queued packets. + */ + return index >= tail; +} + static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -1926,7 +1940,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, txq = netdev_get_tx_queue(dev, qid); nr_frags = 1 + skb_shinfo(skb)->nr_frags; - if (q->queued + nr_frags > q->ndesc) { + if (airoha_dev_tx_queue_busy(q, nr_frags)) { /* not enough space in the queue */ netif_tx_stop_queue(txq); spin_unlock_bh(&q->lock); -- cgit v1.2.3 From bb3d208250424ef25d34f2f05f18f094e5cebfa5 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Thu, 9 Oct 2025 19:08:44 +0200 Subject: drm/xe/ct: Separate waiting for retry from ct send function The function `guc_ct_send_locked()` is really quite simple, but still looks complex due to exposed internals. It is sending a message, and in case of lack of space, waiting for a proper moment to send a retry. Clear separation of send function and wait function will help with readability. This is a cosmetic change only, no functional difference is expected. This patch introduces `guc_ct_send_wait_for_retry()`, and uses it to greatly simplify `guc_ct_send_locked()`. Signed-off-by: Tomasz Lis Reviewed-by: Matthew Brost Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251009170844.178199-1-tomasz.lis@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 64 +++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 3ae1e8db143a..e68953ef3a00 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -991,22 +991,15 @@ static void kick_reset(struct xe_guc_ct *ct) static int dequeue_one_g2h(struct xe_guc_ct *ct); -static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, - u32 g2h_len, u32 num_g2h, - struct g2h_fence *g2h_fence) +/* + * wait before retry of sending h2g message + * Return: true if ready for retry, false if the wait timeouted + */ +static bool guc_ct_send_wait_for_retry(struct xe_guc_ct *ct, u32 len, + u32 g2h_len, struct g2h_fence *g2h_fence, + unsigned int *sleep_period_ms) { struct xe_device *xe = ct_to_xe(ct); - struct xe_gt *gt = ct_to_gt(ct); - unsigned int sleep_period_ms = 1; - int ret; - - xe_gt_assert(gt, !g2h_len || !g2h_fence); - lockdep_assert_held(&ct->lock); - xe_device_assert_mem_access(ct_to_xe(ct)); - -try_again: - ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, - g2h_fence); /* * We wait to try to restore credits for about 1 second before bailing. @@ -1015,24 +1008,22 @@ try_again: * the case of G2H we process any G2H in the channel, hopefully freeing * credits as we consume the G2H messages. */ - if (unlikely(ret == -EBUSY && - !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) { + if (!h2g_has_room(ct, len + GUC_CTB_HDR_LEN)) { struct guc_ctb *h2g = &ct->ctbs.h2g; - if (sleep_period_ms == 1024) - goto broken; + if (*sleep_period_ms == 1024) + return false; trace_xe_guc_ct_h2g_flow_control(xe, h2g->info.head, h2g->info.tail, h2g->info.size, h2g->info.space, len + GUC_CTB_HDR_LEN); - msleep(sleep_period_ms); - sleep_period_ms <<= 1; - - goto try_again; - } else if (unlikely(ret == -EBUSY)) { + msleep(*sleep_period_ms); + *sleep_period_ms <<= 1; + } else { struct xe_device *xe = ct_to_xe(ct); struct guc_ctb *g2h = &ct->ctbs.g2h; + int ret; trace_xe_guc_ct_g2h_flow_control(xe, g2h->info.head, desc_read(xe, g2h, tail), @@ -1046,7 +1037,7 @@ try_again: (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head) if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding || g2h_avail(ct), HZ)) - goto broken; + return false; #undef g2h_avail ret = dequeue_one_g2h(ct); @@ -1054,9 +1045,32 @@ try_again: if (ret != -ECANCELED) xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)", ERR_PTR(ret)); - goto broken; + return false; } + } + return true; +} + +static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h, + struct g2h_fence *g2h_fence) +{ + struct xe_gt *gt = ct_to_gt(ct); + unsigned int sleep_period_ms = 1; + int ret; + + xe_gt_assert(gt, !g2h_len || !g2h_fence); + lockdep_assert_held(&ct->lock); + xe_device_assert_mem_access(ct_to_xe(ct)); +try_again: + ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, + g2h_fence); + + if (unlikely(ret == -EBUSY)) { + if (!guc_ct_send_wait_for_retry(ct, len, g2h_len, g2h_fence, + &sleep_period_ms)) + goto broken; goto try_again; } -- cgit v1.2.3 From a7cdc2086c19e435d4cec3f9393b5f46899c0468 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 28 Sep 2025 22:01:18 +0100 Subject: HID: hid-debug: Fix spelling mistake "Rechargable" -> "Rechargeable" There is a spelling mistake in HID description. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Jiri Kosina --- drivers/hid/hid-debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 7107071c7c51..337d2dc81b4c 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -2523,7 +2523,7 @@ static const struct hid_usage_entry hid_usage_table[] = { { 0x85, 0x0088, "iDeviceName" }, { 0x85, 0x0089, "iDeviceChemistry" }, { 0x85, 0x008a, "ManufacturerData" }, - { 0x85, 0x008b, "Rechargable" }, + { 0x85, 0x008b, "Rechargeable" }, { 0x85, 0x008c, "WarningCapacityLimit" }, { 0x85, 0x008d, "CapacityGranularity1" }, { 0x85, 0x008e, "CapacityGranularity2" }, -- cgit v1.2.3 From 2cfcea7a745794f9b8e265a309717ca6ba335fc4 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 10 Oct 2025 12:41:48 +0200 Subject: drm/xe/svm: Ensure data will be migrated to system if indicated by madvise. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the location madvise() is set to DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM, the drm_pagemap in the SVM gpu fault handler will be set to NULL. However there is nothing that explicitly migrates the data to system if it is already present in device memory. In that case, set the device memory owner to NULL to ensure data gets properly migrated to system on page-fault. v2: - Remove redundant dpagemap assignment (Himal Prasad Ghimiray) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost #v1 Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251010104149.72783-2-thomas.hellstrom@linux.intel.com Fixes: 10aa5c806030 ("drm/gpusvm, drm/xe: Fix userptr to not allow device private pages") --- drivers/gpu/drm/xe/xe_svm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index b268ee0d2271..da2a412f80c0 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -1034,6 +1034,9 @@ retry: if (err) return err; + dpagemap = xe_vma_resolve_pagemap(vma, tile); + if (!dpagemap && !ctx.devmem_only) + ctx.device_private_page_owner = NULL; range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); if (IS_ERR(range)) @@ -1054,7 +1057,6 @@ retry: range_debug(range, "PAGE FAULT"); - dpagemap = xe_vma_resolve_pagemap(vma, tile); if (--migrate_try_count >= 0 && xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) { ktime_t migrate_start = xe_svm_stats_ktime_get(); -- cgit v1.2.3 From ee6e44dfe6e50b4a5df853d933a96bdff5309e6e Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Fri, 10 Oct 2025 00:17:27 +0530 Subject: sched/deadline: Stop dl_server before CPU goes offline IBM CI tool reported kernel warning[1] when running a CPU removal operation through drmgr[2]. i.e "drmgr -c cpu -r -q 1" WARNING: CPU: 0 PID: 0 at kernel/sched/cpudeadline.c:219 cpudl_set+0x58/0x170 NIP [c0000000002b6ed8] cpudl_set+0x58/0x170 LR [c0000000002b7cb8] dl_server_timer+0x168/0x2a0 Call Trace: [c000000002c2f8c0] init_stack+0x78c0/0x8000 (unreliable) [c0000000002b7cb8] dl_server_timer+0x168/0x2a0 [c00000000034df84] __hrtimer_run_queues+0x1a4/0x390 [c00000000034f624] hrtimer_interrupt+0x124/0x300 [c00000000002a230] timer_interrupt+0x140/0x320 Git bisects to: commit 4ae8d9aa9f9d ("sched/deadline: Fix dl_server getting stuck") This happens since: - dl_server hrtimer gets enqueued close to cpu offline, when kthread_park enqueues a fair task. - CPU goes offline and drmgr removes it from cpu_present_mask. - hrtimer fires and warning is hit. Fix it by stopping the dl_server before CPU is marked dead. [1]: https://lore.kernel.org/all/8218e149-7718-4432-9312-f97297c352b9@linux.ibm.com/ [2]: https://github.com/ibm-power-utilities/powerpc-utils/tree/next/src/drmgr [sshegde: wrote the changelog and tested it] Fixes: 4ae8d9aa9f9d ("sched/deadline: Fix dl_server getting stuck") Closes: https://lore.kernel.org/all/8218e149-7718-4432-9312-f97297c352b9@linux.ibm.com Signed-off-by: Peter Zijlstra (Intel) Reported-by: Venkat Rao Bagalkote Signed-off-by: Shrikanth Hegde Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marek Szyprowski Tested-by: Shrikanth Hegde --- kernel/sched/core.c | 2 ++ kernel/sched/deadline.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 198d2dd45f59..f1ebf67b48e2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8571,10 +8571,12 @@ int sched_cpu_dying(unsigned int cpu) sched_tick_stop(cpu); rq_lock_irqsave(rq, &rf); + update_rq_clock(rq); if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) { WARN(true, "Dying CPU not properly vacated!"); dump_rq_tasks(rq, KERN_WARNING); } + dl_server_stop(&rq->fair_server); rq_unlock_irqrestore(rq, &rf); calc_load_migrate(rq); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 615411a0a881..7b7671060bf9 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1582,6 +1582,9 @@ void dl_server_start(struct sched_dl_entity *dl_se) if (!dl_server(dl_se) || dl_se->dl_server_active) return; + if (WARN_ON_ONCE(!cpu_online(cpu_of(rq)))) + return; + dl_se->dl_server_active = 1; enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP); if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl)) -- cgit v1.2.3 From 17e3e88ed0b6318fde0d1c14df1a804711cab1b5 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 8 Oct 2025 15:12:14 +0200 Subject: sched/fair: Fix pelt lost idle time detection The check for some lost idle pelt time should be always done when pick_next_task_fair() fails to pick a task and not only when we call it from the fair fast-path. The case happens when the last running task on rq is a RT or DL task. When the latter goes to sleep and the /Sum of util_sum of the rq is at the max value, we don't account the lost of idle time whereas we should. Fixes: 67692435c411 ("sched: Rework pick_next_task() slow-path") Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/fair.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bc0b7ce8a65d..cee1793e8277 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8920,21 +8920,21 @@ simple: return p; idle: - if (!rf) - return NULL; - - new_tasks = sched_balance_newidle(rq, rf); + if (rf) { + new_tasks = sched_balance_newidle(rq, rf); - /* - * Because sched_balance_newidle() releases (and re-acquires) rq->lock, it is - * possible for any higher priority task to appear. In that case we - * must re-start the pick_next_entity() loop. - */ - if (new_tasks < 0) - return RETRY_TASK; + /* + * Because sched_balance_newidle() releases (and re-acquires) + * rq->lock, it is possible for any higher priority task to + * appear. In that case we must re-start the pick_next_entity() + * loop. + */ + if (new_tasks < 0) + return RETRY_TASK; - if (new_tasks > 0) - goto again; + if (new_tasks > 0) + goto again; + } /* * rq is about to be idle, check if we need to update the -- cgit v1.2.3 From ae11e08c3d0c78d08dac4cea30bf39ede2130b03 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 4 Jul 2025 10:54:15 +0300 Subject: i2c: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Acked-by: Andi Shyti Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-amd-mp2.h | 1 - drivers/i2c/busses/i2c-at91-core.c | 1 - drivers/i2c/busses/i2c-at91-master.c | 1 - drivers/i2c/busses/i2c-cadence.c | 1 - drivers/i2c/busses/i2c-davinci.c | 2 -- drivers/i2c/busses/i2c-designware-master.c | 1 - drivers/i2c/busses/i2c-hix5hd2.c | 1 - drivers/i2c/busses/i2c-i801.c | 1 - drivers/i2c/busses/i2c-img-scb.c | 3 --- drivers/i2c/busses/i2c-imx-lpi2c.c | 4 ---- drivers/i2c/busses/i2c-imx.c | 3 --- drivers/i2c/busses/i2c-mv64xxx.c | 1 - drivers/i2c/busses/i2c-nvidia-gpu.c | 1 - drivers/i2c/busses/i2c-omap.c | 3 --- drivers/i2c/busses/i2c-qcom-cci.c | 2 -- drivers/i2c/busses/i2c-qcom-geni.c | 1 - drivers/i2c/busses/i2c-qup.c | 3 --- drivers/i2c/busses/i2c-riic.c | 2 -- drivers/i2c/busses/i2c-rzv2m.c | 1 - drivers/i2c/busses/i2c-sprd.c | 2 -- drivers/i2c/busses/i2c-stm32f7.c | 5 ----- drivers/i2c/busses/i2c-xiic.c | 1 - 22 files changed, 41 deletions(-) diff --git a/drivers/i2c/busses/i2c-amd-mp2.h b/drivers/i2c/busses/i2c-amd-mp2.h index 018a42de8b1e..9b7e9494dd12 100644 --- a/drivers/i2c/busses/i2c-amd-mp2.h +++ b/drivers/i2c/busses/i2c-amd-mp2.h @@ -207,7 +207,6 @@ static inline void amd_mp2_pm_runtime_get(struct amd_mp2_dev *mp2_dev) static inline void amd_mp2_pm_runtime_put(struct amd_mp2_dev *mp2_dev) { - pm_runtime_mark_last_busy(&mp2_dev->pci_dev->dev); pm_runtime_put_autosuspend(&mp2_dev->pci_dev->dev); } diff --git a/drivers/i2c/busses/i2c-at91-core.c b/drivers/i2c/busses/i2c-at91-core.c index edc047e3e535..b64adef778d4 100644 --- a/drivers/i2c/busses/i2c-at91-core.c +++ b/drivers/i2c/busses/i2c-at91-core.c @@ -313,7 +313,6 @@ static int __maybe_unused at91_twi_resume_noirq(struct device *dev) return ret; } - pm_runtime_mark_last_busy(dev); pm_request_autosuspend(dev); at91_init_twi_bus(twi_dev); diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c index 59795c1c24ff..894cedbca99f 100644 --- a/drivers/i2c/busses/i2c-at91-master.c +++ b/drivers/i2c/busses/i2c-at91-master.c @@ -717,7 +717,6 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) ret = (ret < 0) ? ret : num; out: - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 697d095afbe4..0fb728ade92e 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -1128,7 +1128,6 @@ out: cdns_i2c_set_mode(CDNS_I2C_MODE_SLAVE, id); #endif - pm_runtime_mark_last_busy(id->dev); pm_runtime_put_autosuspend(id->dev); return ret; } diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index 6a3d4e9e07f4..a773ba082321 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -543,7 +543,6 @@ i2c_davinci_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) ret = num; out: - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return ret; @@ -821,7 +820,6 @@ static int davinci_i2c_probe(struct platform_device *pdev) if (r) goto err_unuse_clocks; - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return 0; diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index c7a72c28786c..41e9b5ecad20 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -901,7 +901,6 @@ done: i2c_dw_release_lock(dev); done_nolock: - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/drivers/i2c/busses/i2c-hix5hd2.c b/drivers/i2c/busses/i2c-hix5hd2.c index 5358f5ddf924..95ab910b80c0 100644 --- a/drivers/i2c/busses/i2c-hix5hd2.c +++ b/drivers/i2c/busses/i2c-hix5hd2.c @@ -373,7 +373,6 @@ static int hix5hd2_i2c_xfer(struct i2c_adapter *adap, ret = num; out: - pm_runtime_mark_last_busy(priv->dev); pm_runtime_put_autosuspend(priv->dev); return ret; } diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index cba992fa6557..57fbec1259be 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -930,7 +930,6 @@ out: */ iowrite8(SMBHSTSTS_INUSE_STS | STATUS_FLAGS, SMBHSTSTS(priv)); - pm_runtime_mark_last_busy(&priv->pci_dev->dev); pm_runtime_put_autosuspend(&priv->pci_dev->dev); return ret; } diff --git a/drivers/i2c/busses/i2c-img-scb.c b/drivers/i2c/busses/i2c-img-scb.c index a454f9f25146..88192c25c44c 100644 --- a/drivers/i2c/busses/i2c-img-scb.c +++ b/drivers/i2c/busses/i2c-img-scb.c @@ -1131,7 +1131,6 @@ static int img_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, break; } - pm_runtime_mark_last_busy(adap->dev.parent); pm_runtime_put_autosuspend(adap->dev.parent); return i2c->msg_status ? i2c->msg_status : num; @@ -1165,7 +1164,6 @@ static int img_i2c_init(struct img_i2c *i2c) "Unknown hardware revision (%d.%d.%d.%d)\n", (rev >> 24) & 0xff, (rev >> 16) & 0xff, (rev >> 8) & 0xff, rev & 0xff); - pm_runtime_mark_last_busy(i2c->adap.dev.parent); pm_runtime_put_autosuspend(i2c->adap.dev.parent); return -EINVAL; } @@ -1317,7 +1315,6 @@ static int img_i2c_init(struct img_i2c *i2c) /* Perform a synchronous sequence to reset the bus */ ret = img_i2c_reset_bus(i2c); - pm_runtime_mark_last_busy(i2c->adap.dev.parent); pm_runtime_put_autosuspend(i2c->adap.dev.parent); return ret; diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 03b5a7e8c361..2a0962a0b441 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -363,7 +363,6 @@ static int lpi2c_imx_master_enable(struct lpi2c_imx_struct *lpi2c_imx) return 0; rpm_put: - pm_runtime_mark_last_busy(lpi2c_imx->adapter.dev.parent); pm_runtime_put_autosuspend(lpi2c_imx->adapter.dev.parent); return ret; @@ -377,7 +376,6 @@ static int lpi2c_imx_master_disable(struct lpi2c_imx_struct *lpi2c_imx) temp &= ~MCR_MEN; writel(temp, lpi2c_imx->base + LPI2C_MCR); - pm_runtime_mark_last_busy(lpi2c_imx->adapter.dev.parent); pm_runtime_put_autosuspend(lpi2c_imx->adapter.dev.parent); return 0; @@ -1462,7 +1460,6 @@ static int lpi2c_imx_probe(struct platform_device *pdev) if (ret) goto rpm_disable; - pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); dev_info(&lpi2c_imx->adapter.dev, "LPI2C adapter registered\n"); @@ -1564,7 +1561,6 @@ static int lpi2c_suspend(struct device *dev) static int lpi2c_resume(struct device *dev) { - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 60f5c790ad7c..dcce882f3eba 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1637,7 +1637,6 @@ static int i2c_imx_xfer(struct i2c_adapter *adapter, result = i2c_imx_xfer_common(adapter, msgs, num, false); - pm_runtime_mark_last_busy(i2c_imx->adapter.dev.parent); pm_runtime_put_autosuspend(i2c_imx->adapter.dev.parent); return result; @@ -1822,7 +1821,6 @@ static int i2c_imx_probe(struct platform_device *pdev) if (ret < 0) goto clk_notifier_unregister; - pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); dev_dbg(&i2c_imx->adapter.dev, "claimed irq %d\n", irq); @@ -1928,7 +1926,6 @@ static int i2c_imx_suspend(struct device *dev) static int i2c_imx_resume(struct device *dev) { - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 8fc26a511320..1acba628e16c 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -766,7 +766,6 @@ mv64xxx_i2c_xfer_core(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) drv_data->num_msgs = 0; drv_data->msgs = NULL; - pm_runtime_mark_last_busy(&adap->dev); pm_runtime_put_autosuspend(&adap->dev); return ret; diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c index 541d808d62d0..14c059b03945 100644 --- a/drivers/i2c/busses/i2c-nvidia-gpu.c +++ b/drivers/i2c/busses/i2c-nvidia-gpu.c @@ -216,7 +216,6 @@ exit: if (status2 < 0) dev_err(i2cd->dev, "i2c stop failed %d\n", status2); } - pm_runtime_mark_last_busy(i2cd->dev); pm_runtime_put_autosuspend(i2cd->dev); return status; } diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 5fcc9f6c33e5..d9f590f0c384 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -828,7 +828,6 @@ omap_i2c_xfer_common(struct i2c_adapter *adap, struct i2c_msg msgs[], int num, omap->set_mpu_wkup_lat(omap->dev, -1); out: - pm_runtime_mark_last_busy(omap->dev); pm_runtime_put_autosuspend(omap->dev); return r; } @@ -1510,7 +1509,6 @@ omap_i2c_probe(struct platform_device *pdev) dev_info(omap->dev, "bus %d rev%d.%d at %d kHz\n", adap->nr, major, minor, omap->speed); - pm_runtime_mark_last_busy(omap->dev); pm_runtime_put_autosuspend(omap->dev); return 0; @@ -1605,7 +1603,6 @@ static int omap_i2c_suspend(struct device *dev) static int omap_i2c_resume(struct device *dev) { - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index a3afa11a71a1..e631d79baf14 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c +++ b/drivers/i2c/busses/i2c-qcom-cci.c @@ -450,7 +450,6 @@ static int cci_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) ret = num; err: - pm_runtime_mark_last_busy(cci->dev); pm_runtime_put_autosuspend(cci->dev); return ret; @@ -508,7 +507,6 @@ static int __maybe_unused cci_suspend(struct device *dev) static int __maybe_unused cci_resume(struct device *dev) { cci_resume_runtime(dev); - pm_runtime_mark_last_busy(dev); pm_request_autosuspend(dev); return 0; diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 95a577764d5c..43fdd89b8beb 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -714,7 +714,6 @@ static int geni_i2c_xfer(struct i2c_adapter *adap, else ret = geni_i2c_fifo_xfer(gi2c, msgs, num); - pm_runtime_mark_last_busy(gi2c->se.dev); pm_runtime_put_autosuspend(gi2c->se.dev); gi2c->cur = NULL; gi2c->err = 0; diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index fc348924d522..a0e076fc5f36 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -1139,7 +1139,6 @@ static int qup_i2c_xfer(struct i2c_adapter *adap, ret = num; out: - pm_runtime_mark_last_busy(qup->dev); pm_runtime_put_autosuspend(qup->dev); return ret; @@ -1624,7 +1623,6 @@ static int qup_i2c_xfer_v2(struct i2c_adapter *adap, if (ret == 0) ret = num; out: - pm_runtime_mark_last_busy(qup->dev); pm_runtime_put_autosuspend(qup->dev); return ret; @@ -1991,7 +1989,6 @@ static int qup_i2c_suspend(struct device *device) static int qup_i2c_resume(struct device *device) { qup_i2c_pm_resume_runtime(device); - pm_runtime_mark_last_busy(device); pm_request_autosuspend(device); return 0; } diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c index b0ee9ac45a97..3e8f126cb7f7 100644 --- a/drivers/i2c/busses/i2c-riic.c +++ b/drivers/i2c/busses/i2c-riic.c @@ -206,7 +206,6 @@ static int riic_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) } out: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return riic->err ?: num; @@ -452,7 +451,6 @@ static int riic_init_hw(struct riic_dev *riic) riic_clear_set_bit(riic, ICCR1_IICRST, 0, RIIC_ICCR1); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; } diff --git a/drivers/i2c/busses/i2c-rzv2m.c b/drivers/i2c/busses/i2c-rzv2m.c index b0e9c0b62429..238714850673 100644 --- a/drivers/i2c/busses/i2c-rzv2m.c +++ b/drivers/i2c/busses/i2c-rzv2m.c @@ -372,7 +372,6 @@ static int rzv2m_i2c_xfer(struct i2c_adapter *adap, ret = num; out: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c index 26ec34b19ad5..1b490525d8dd 100644 --- a/drivers/i2c/busses/i2c-sprd.c +++ b/drivers/i2c/busses/i2c-sprd.c @@ -302,7 +302,6 @@ static int sprd_i2c_xfer(struct i2c_adapter *i2c_adap, ret = sprd_i2c_handle_msg(i2c_adap, &msgs[im++], 1); err_msg: - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return ret < 0 ? ret : im; @@ -559,7 +558,6 @@ static int sprd_i2c_probe(struct platform_device *pdev) goto err_rpm_put; } - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return 0; diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index e6815f6cae78..dc69ed934ec8 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -1761,7 +1761,6 @@ static int stm32f7_i2c_xfer_core(struct i2c_adapter *i2c_adap, } pm_free: - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return (ret < 0) ? ret : num; @@ -1870,7 +1869,6 @@ static int stm32f7_i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, } pm_free: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; } @@ -1977,7 +1975,6 @@ pm_free: if (!stm32f7_i2c_is_slave_registered(i2c_dev)) stm32f7_i2c_enable_wakeup(i2c_dev, false); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -2015,7 +2012,6 @@ static int stm32f7_i2c_unreg_slave(struct i2c_client *slave) stm32f7_i2c_enable_wakeup(i2c_dev, false); } - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return 0; @@ -2328,7 +2324,6 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) dev_info(i2c_dev->dev, "STM32F7 I2C-%d bus adapter\n", adap->nr); - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return 0; diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 607026c921d6..28015d77599d 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -1349,7 +1349,6 @@ static int xiic_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) mutex_unlock(&i2c->lock); out: - pm_runtime_mark_last_busy(i2c->dev); pm_runtime_put_autosuspend(i2c->dev); return err; } -- cgit v1.2.3 From 72f437e674e54f1c143dccc67e5556d8d5acb241 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Oct 2025 23:23:10 +0200 Subject: i2c: usbio: Add ACPI device-id for MTL-CVF devices Add "INTC10D2" ACPI device-id for MTL-CVF devices, like the Dell Latitude 7450. Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2368506 Signed-off-by: Hans de Goede Acked-by: Sakari Ailus Acked-by: Israel Cepeda Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-usbio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-usbio.c b/drivers/i2c/busses/i2c-usbio.c index d42f9ab6e9a5..e7799abf6787 100644 --- a/drivers/i2c/busses/i2c-usbio.c +++ b/drivers/i2c/busses/i2c-usbio.c @@ -27,6 +27,7 @@ static const struct acpi_device_id usbio_i2c_acpi_hids[] = { { "INTC1008" }, /* MTL */ { "INTC10B3" }, /* ARL */ { "INTC10B6" }, /* LNL */ + { "INTC10D2" }, /* MTL-CVF */ { "INTC10E3" }, /* PTL */ { } }; -- cgit v1.2.3 From 867537094124b0736ca2a40193de94fc5dc0b8d3 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Mon, 13 Oct 2025 16:31:18 -0500 Subject: dt-bindings: i2c: Convert apm,xgene-slimpro-i2c to DT schema Convert APM X-Gene slimpro-i2c binding to DT schema format. It's a straight-forward conversion. Signed-off-by: Rob Herring (Arm) Signed-off-by: Wolfram Sang --- .../bindings/i2c/apm,xgene-slimpro-i2c.yaml | 36 ++++++++++++++++++++++ .../devicetree/bindings/i2c/i2c-xgene-slimpro.txt | 15 --------- 2 files changed, 36 insertions(+), 15 deletions(-) create mode 100644 Documentation/devicetree/bindings/i2c/apm,xgene-slimpro-i2c.yaml delete mode 100644 Documentation/devicetree/bindings/i2c/i2c-xgene-slimpro.txt diff --git a/Documentation/devicetree/bindings/i2c/apm,xgene-slimpro-i2c.yaml b/Documentation/devicetree/bindings/i2c/apm,xgene-slimpro-i2c.yaml new file mode 100644 index 000000000000..9460c64071f2 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/apm,xgene-slimpro-i2c.yaml @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/i2c/apm,xgene-slimpro-i2c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: APM X-Gene SLIMpro Mailbox I2C + +maintainers: + - Khuong Dinh + +description: + An I2C controller accessed over the "SLIMpro" mailbox. + +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + +properties: + compatible: + const: apm,xgene-slimpro-i2c + + mboxes: + maxItems: 1 + +required: + - compatible + - mboxes + +unevaluatedProperties: false + +examples: + - | + i2c { + compatible = "apm,xgene-slimpro-i2c"; + mboxes = <&mailbox 0>; + }; diff --git a/Documentation/devicetree/bindings/i2c/i2c-xgene-slimpro.txt b/Documentation/devicetree/bindings/i2c/i2c-xgene-slimpro.txt deleted file mode 100644 index f6b2c20cfbf6..000000000000 --- a/Documentation/devicetree/bindings/i2c/i2c-xgene-slimpro.txt +++ /dev/null @@ -1,15 +0,0 @@ -APM X-Gene SLIMpro Mailbox I2C Driver - -An I2C controller accessed over the "SLIMpro" mailbox. - -Required properties : - - - compatible : should be "apm,xgene-slimpro-i2c" - - mboxes : use the label reference for the mailbox as the first parameter. - The second parameter is the channel number. - -Example : - i2cslimpro { - compatible = "apm,xgene-slimpro-i2c"; - mboxes = <&mailbox 0>; - }; -- cgit v1.2.3 From 4f86eb0a38bc719ba966f155071a6f0594327f34 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Mon, 13 Oct 2025 16:00:39 +0800 Subject: selftests: net: check jq command is supported The jq command is used in vlan_bridge_binding.sh, if it is not supported, the test will spam the following log. # ./vlan_bridge_binding.sh: line 51: jq: command not found # ./vlan_bridge_binding.sh: line 51: jq: command not found # ./vlan_bridge_binding.sh: line 51: jq: command not found # ./vlan_bridge_binding.sh: line 51: jq: command not found # ./vlan_bridge_binding.sh: line 51: jq: command not found # TEST: Test bridge_binding on->off when lower down [FAIL] # Got operstate of , expected 0 The rtnetlink.sh has the same problem. It makes sense to check if jq is installed before running these tests. After this patch, the vlan_bridge_binding.sh skipped if jq is not supported: # timeout set to 3600 # selftests: net: vlan_bridge_binding.sh # TEST: jq not installed [SKIP] Fixes: dca12e9ab760 ("selftests: net: Add a VLAN bridge binding selftest") Fixes: 6a414fd77f61 ("selftests: rtnetlink: Add an address proto test") Signed-off-by: Wang Liang Reviewed-by: Hangbin Liu Link: https://patch.msgid.link/20251013080039.3035898-1-wangliang74@huawei.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/rtnetlink.sh | 2 ++ tools/testing/selftests/net/vlan_bridge_binding.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index dbf77513f617..163a084d525d 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -1466,6 +1466,8 @@ usage: ${0##*/} OPTS EOF } +require_command jq + #check for needed privileges if [ "$(id -u)" -ne 0 ];then end_test "SKIP: Need root privileges" diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh index db481af9b6b3..e8c02c64e03a 100755 --- a/tools/testing/selftests/net/vlan_bridge_binding.sh +++ b/tools/testing/selftests/net/vlan_bridge_binding.sh @@ -249,6 +249,8 @@ test_binding_toggle_off_when_upper_down() do_test_binding_off : "on->off when upper down" } +require_command jq + trap defer_scopes_cleanup EXIT setup_prepare tests_run -- cgit v1.2.3 From d41f68dff783d181a8fd462e612bda0fbab7f735 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 13 Oct 2025 20:05:52 -0700 Subject: ALSA: firewire: amdtp-stream: fix enum kernel-doc warnings Fix spelling of CIP_NO_HEADER to prevent a kernel-doc warning. Warning: amdtp-stream.h:57 Enum value 'CIP_NO_HEADER' not described in enum 'cip_flags' Warning: amdtp-stream.h:57 Excess enum value '%CIP_NO_HEADERS' description in 'cip_flags' Fixes: 3b196c394dd9f ("ALSA: firewire-lib: add no-header packet processing") Signed-off-by: Randy Dunlap Reviewed-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/amdtp-stream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index 775db3fc4959..ec10270c2cce 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -32,7 +32,7 @@ * allows 5 times as large as IEC 61883-6 defines. * @CIP_HEADER_WITHOUT_EOH: Only for in-stream. CIP Header doesn't include * valid EOH. - * @CIP_NO_HEADERS: a lack of headers in packets + * @CIP_NO_HEADER: a lack of headers in packets * @CIP_UNALIGHED_DBC: Only for in-stream. The value of dbc is not alighed to * the value of current SYT_INTERVAL; e.g. initial value is not zero. * @CIP_UNAWARE_SYT: For outgoing packet, the value in SYT field of CIP is 0xffff. -- cgit v1.2.3 From fd6e385528d8f85993b7bfc6430576136bb14c65 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Tue, 7 Oct 2025 13:57:50 +0200 Subject: accel/qaic: Fix bootlog initialization ordering As soon as we queue MHI buffers to receive the bootlog from the device, we could be receiving data. Therefore all the resources needed to process that data need to be setup prior to queuing the buffers. We currently initialize some of the resources after queuing the buffers which creates a race between the probe() and any data that comes back from the device. If the uninitialized resources are accessed, we could see page faults. Fix the init ordering to close the race. Fixes: 5f8df5c6def6 ("accel/qaic: Add bootlog debugfs") Signed-off-by: Jeffrey Hugo Signed-off-by: Youssef Samir Reviewed-by: Jeff Hugo Reviewed-by: Carl Vanderlip Signed-off-by: Jeff Hugo Link: https://lore.kernel.org/r/20251007115750.332169-1-youssef.abdulrahman@oss.qualcomm.com --- drivers/accel/qaic/qaic_debugfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c index a991b8198dc4..8dc4fe5bb560 100644 --- a/drivers/accel/qaic/qaic_debugfs.c +++ b/drivers/accel/qaic/qaic_debugfs.c @@ -218,6 +218,9 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d if (ret) goto destroy_workqueue; + dev_set_drvdata(&mhi_dev->dev, qdev); + qdev->bootlog_ch = mhi_dev; + for (i = 0; i < BOOTLOG_POOL_SIZE; i++) { msg = devm_kzalloc(&qdev->pdev->dev, sizeof(*msg), GFP_KERNEL); if (!msg) { @@ -233,8 +236,6 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d goto mhi_unprepare; } - dev_set_drvdata(&mhi_dev->dev, qdev); - qdev->bootlog_ch = mhi_dev; return 0; mhi_unprepare: -- cgit v1.2.3 From 89e347f8a70165d1e8d88a93d875da7742c902ce Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 08:30:15 -0700 Subject: drm/xe/kunit: Fix kerneldoc for parameterized tests Kunit's generate_params() was recently updated to take an additional test context parameter. Xe's IP and platform parameter generators were updated accordingly at the same time, but the new parameter was not added to the functions' kerneldoc, resulting in the following warnings: Warning: drivers/gpu/drm/xe/tests/xe_pci.c:78 function parameter 'test' not described in 'xe_pci_fake_data_gen_params' Warning: drivers/gpu/drm/xe/tests/xe_pci.c:254 function parameter 'test' not described in 'xe_pci_graphics_ip_gen_param' Warning: drivers/gpu/drm/xe/tests/xe_pci.c:278 function parameter 'test' not described in 'xe_pci_media_ip_gen_param' Warning: drivers/gpu/drm/xe/tests/xe_pci.c:302 function parameter 'test' not described in 'xe_pci_id_gen_param' Warning: drivers/gpu/drm/xe/tests/xe_pci.c:390 function parameter 'test' not described in 'xe_pci_live_device_gen_param' 5 warnings as errors Document the new parameter to eliminate the warnings and make CI happy. Fixes: b9a214b5f6aa ("kunit: Pass parameterized test context to generate_params()") Reviewed-by: Shuicheng Lin Link: https://lore.kernel.org/r/20251013153014.2362879-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/tests/xe_pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 69e2840c7ef0..663a79ec960d 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -66,6 +66,7 @@ KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc); /** * xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -242,6 +243,7 @@ KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); /** * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -266,6 +268,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); /** * xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -290,6 +293,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); /** * xe_pci_id_gen_param - Generate struct pci_device_id parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -376,6 +380,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); /** * xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters + * @test: test context object * @prev: the previously returned value, or NULL for the first iteration * @desc: the buffer for a parameter name * -- cgit v1.2.3 From 11f08c30a3e4157305ba692f1d44cca5fc9a8fca Mon Sep 17 00:00:00 2001 From: Youssef Samir Date: Tue, 7 Oct 2025 14:23:20 +0200 Subject: accel/qaic: Treat remaining == 0 as error in find_and_map_user_pages() Currently, if find_and_map_user_pages() takes a DMA xfer request from the user with a length field set to 0, or in a rare case, the host receives QAIC_TRANS_DMA_XFER_CONT from the device where resources->xferred_dma_size is equal to the requested transaction size, the function will return 0 before allocating an sgt or setting the fields of the dma_xfer struct. In that case, encode_addr_size_pairs() will try to access the sgt which will lead to a general protection fault. Return an EINVAL in case the user provides a zero-sized ALP, or the device requests continuation after all of the bytes have been transferred. Fixes: 96d3c1cadedb ("accel/qaic: Clean up integer overflow checking in map_user_pages()") Signed-off-by: Youssef Samir Signed-off-by: Youssef Samir Reviewed-by: Jeff Hugo Reviewed-by: Carl Vanderlip Signed-off-by: Jeff Hugo Link: https://lore.kernel.org/r/20251007122320.339654-1-youssef.abdulrahman@oss.qualcomm.com --- drivers/accel/qaic/qaic_control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c index d8bdab69f800..b86a8e48e731 100644 --- a/drivers/accel/qaic/qaic_control.c +++ b/drivers/accel/qaic/qaic_control.c @@ -407,7 +407,7 @@ static int find_and_map_user_pages(struct qaic_device *qdev, return -EINVAL; remaining = in_trans->size - resources->xferred_dma_size; if (remaining == 0) - return 0; + return -EINVAL; if (check_add_overflow(xfer_start_addr, remaining, &end)) return -EINVAL; -- cgit v1.2.3 From 2816905e1403ccc2ff9db12b5bbc89d11187c942 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:44 -0700 Subject: drm/xe/huc: Adjust HuC check on primary GT The HuC initialization code determines whether a platform can have a HuC on the primary GT by checking whether tile->media_gt is NULL; old Xe1 platforms that combined render+media into a single GT will always have a NULL media_gt pointer. However once we allow media to be disabled via configfs, there will also be cases where tile->media_gt is NULL on more modern platforms, causing this condition to behave incorrectly. To handle cases where media gets disabled via configfs (or theoretical cases where media is truly fused off in hardware), change the condition to consider the graphics version of the primary GT; only the old Xe1 platforms with graphics versions 12.55 or earlier should try to initialize a HuC on the primary GT. Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Reviewed-by: Tejas Upadhyay Link: https://lore.kernel.org/r/20251013200944.2499947-26-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_huc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 7e43b2dd6a32..0a70c8924582 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -66,14 +66,18 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) int xe_huc_init(struct xe_huc *huc) { struct xe_gt *gt = huc_to_gt(huc); - struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); int ret; huc->fw.type = XE_UC_FW_TYPE_HUC; - /* On platforms with a media GT the HuC is only available there */ - if (tile->media_gt && (gt != tile->media_gt)) { + /* + * The HuC is only available on the media GT on most platforms. The + * exception to that rule are the old Xe1 platforms where there was + * no separate GT for media IP, so the HuC was part of the primary + * GT. Such platforms have graphics versions 12.55 and earlier. + */ + if (!xe_gt_is_media_type(gt) && GRAPHICS_VERx100(xe) > 1255) { xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); return 0; } -- cgit v1.2.3 From d41f306cc2e3cc44b1226122d97bf3d451c9c6e6 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:45 -0700 Subject: drm/xe: Drop GT parameter to xe_display_irq_postinstall() Display interrupt handling has no relation to GT(s) on the platforms supported by the Xe driver. We only call xe_display_irq_postinstall with the first tile's primary GT, so the single condition that uses the GT pointer within the function always evaluates to true. Drop the unnecessary parameter and the condition. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251013200944.2499947-27-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/display/xe_display.c | 5 ++--- drivers/gpu/drm/xe/display/xe_display.h | 4 ++-- drivers/gpu/drm/xe/xe_irq.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 19e691fccf8c..f5101f267c18 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -223,15 +223,14 @@ void xe_display_irq_reset(struct xe_device *xe) gen11_display_irq_reset(display); } -void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +void xe_display_irq_postinstall(struct xe_device *xe) { struct intel_display *display = xe->display; if (!xe->info.probe_display) return; - if (gt->info.id == XE_GT0) - gen11_de_irq_postinstall(display); + gen11_de_irq_postinstall(display); } static bool suspend_to_idle(void) diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index e533aa4750bc..76db95c25f7e 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -26,7 +26,7 @@ void xe_display_unregister(struct xe_device *xe); void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl); void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir); void xe_display_irq_reset(struct xe_device *xe); -void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt); +void xe_display_irq_postinstall(struct xe_device *xe); void xe_display_pm_suspend(struct xe_device *xe); void xe_display_pm_shutdown(struct xe_device *xe); @@ -55,7 +55,7 @@ static inline void xe_display_unregister(struct xe_device *xe) {} static inline void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) {} static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) {} static inline void xe_display_irq_reset(struct xe_device *xe) {} -static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {} +static inline void xe_display_irq_postinstall(struct xe_device *xe) {} static inline void xe_display_pm_suspend(struct xe_device *xe) {} static inline void xe_display_pm_shutdown(struct xe_device *xe) {} diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index af519414a429..9b938f1edaf5 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -657,7 +657,7 @@ static void xe_irq_postinstall(struct xe_device *xe) xe_memirq_postinstall(&tile->memirq); } - xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe)); + xe_display_irq_postinstall(xe); xe_i2c_irq_postinstall(xe); /* -- cgit v1.2.3 From a3bcaf11f410583826e6f7769cf07091bbd0888f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:46 -0700 Subject: drm/xe: Move 'va_bits' flag back to platform descriptor The number of virtual address bits is something that should be tracked at the platform level rather than the IP level. Even when mixing and matching various graphics, media, and display IP blocks, the platform as a whole has to have consistent page table handling. This is also a trait that should be tied to the platform even if the graphics IP itself is not present (e.g., if we disable the primary GT via configfs). v2: - Drop the default value of 48 and explicitly set it in each relevant descriptor. (Lucas, Michal) v3: - Drop an outdated comment about default value. (Michal) Cc: Lucas De Marchi Cc: Michal Wajdeczko Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251013200944.2499947-28-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_pci.c | 21 +++++++++++++++------ drivers/gpu/drm/xe/xe_pci_types.h | 2 +- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index e706b2808553..e0e3249045a2 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -52,13 +52,11 @@ __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); static const struct xe_graphics_desc graphics_xelp = { .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), - .va_bits = 48, .vm_max_level = 3, }; #define XE_HP_FEATURES \ .has_range_tlb_inval = true, \ - .va_bits = 48, \ .vm_max_level = 3 static const struct xe_graphics_desc graphics_xehpg = { @@ -84,7 +82,6 @@ static const struct xe_graphics_desc graphics_xehpc = { BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), XE_HP_FEATURES, - .va_bits = 57, .vm_max_level = 4, .vram_flags = XE_VRAM_FLAGS_NEED64K, @@ -108,7 +105,6 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_range_tlb_inval = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ - .va_bits = 48, \ .vm_max_level = 4, \ .hw_engine_mask = \ BIT(XE_HW_ENGINE_RCS0) | \ @@ -174,6 +170,7 @@ static const struct xe_device_desc tgl_desc = { .has_sriov = true, .max_gt_per_tile = 1, .require_force_probe = true, + .va_bits = 48, }; static const struct xe_device_desc rkl_desc = { @@ -185,6 +182,7 @@ static const struct xe_device_desc rkl_desc = { .has_llc = true, .max_gt_per_tile = 1, .require_force_probe = true, + .va_bits = 48, }; static const u16 adls_rpls_ids[] = { INTEL_RPLS_IDS(NOP), 0 }; @@ -203,6 +201,7 @@ static const struct xe_device_desc adl_s_desc = { { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, {}, }, + .va_bits = 48, }; static const u16 adlp_rplu_ids[] = { INTEL_RPLU_IDS(NOP), 0 }; @@ -221,6 +220,7 @@ static const struct xe_device_desc adl_p_desc = { { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, {}, }, + .va_bits = 48, }; static const struct xe_device_desc adl_n_desc = { @@ -233,6 +233,7 @@ static const struct xe_device_desc adl_n_desc = { .has_sriov = true, .max_gt_per_tile = 1, .require_force_probe = true, + .va_bits = 48, }; #define DGFX_FEATURES \ @@ -249,6 +250,7 @@ static const struct xe_device_desc dg1_desc = { .has_heci_gscfi = 1, .max_gt_per_tile = 1, .require_force_probe = true, + .va_bits = 48, }; static const u16 dg2_g10_ids[] = { INTEL_DG2_G10_IDS(NOP), INTEL_ATS_M150_IDS(NOP), 0 }; @@ -265,7 +267,8 @@ static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 }; { XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \ { XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \ { } \ - } + }, \ + .va_bits = 48 static const struct xe_device_desc ats_m_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, @@ -303,6 +306,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .max_gt_per_tile = 1, .max_remote_tiles = 1, .require_force_probe = true, + .va_bits = 57, .has_mbx_power_limits = false, }; @@ -314,6 +318,7 @@ static const struct xe_device_desc mtl_desc = { .has_display = true, .has_pxp = true, .max_gt_per_tile = 2, + .va_bits = 48, }; static const struct xe_device_desc lnl_desc = { @@ -323,6 +328,7 @@ static const struct xe_device_desc lnl_desc = { .has_pxp = true, .max_gt_per_tile = 2, .needs_scratch = true, + .va_bits = 48, }; static const struct xe_device_desc bmg_desc = { @@ -338,6 +344,7 @@ static const struct xe_device_desc bmg_desc = { .has_sriov = true, .max_gt_per_tile = 2, .needs_scratch = true, + .va_bits = 48, }; static const struct xe_device_desc ptl_desc = { @@ -348,6 +355,7 @@ static const struct xe_device_desc ptl_desc = { .max_gt_per_tile = 2, .needs_scratch = true, .needs_shared_vf_gt_wq = true, + .va_bits = 48, }; #undef PLATFORM @@ -583,6 +591,8 @@ static int xe_info_init_early(struct xe_device *xe, subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; xe->info.dma_mask_size = desc->dma_mask_size; + xe->info.va_bits = desc->va_bits; + xe->info.is_dgfx = desc->is_dgfx; xe->info.has_fan_control = desc->has_fan_control; xe->info.has_mbx_power_limits = desc->has_mbx_power_limits; @@ -713,7 +723,6 @@ static int xe_info_init(struct xe_device *xe, } xe->info.vram_flags = graphics_desc->vram_flags; - xe->info.va_bits = graphics_desc->va_bits; xe->info.vm_max_level = graphics_desc->vm_max_level; xe->info.has_asid = graphics_desc->has_asid; xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index b11bf6abda5b..7c27e3742aa7 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -30,6 +30,7 @@ struct xe_device_desc { u8 dma_mask_size; u8 max_remote_tiles:2; u8 max_gt_per_tile:2; + u8 va_bits; u8 require_force_probe:1; u8 is_dgfx:1; @@ -52,7 +53,6 @@ struct xe_device_desc { }; struct xe_graphics_desc { - u8 va_bits; u8 vm_max_level; u8 vram_flags; -- cgit v1.2.3 From 50292f9af8ec06dd1679943cddc3b59844266b9e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:47 -0700 Subject: drm/xe: Move 'vm_max_level' flag back to platform descriptor The number of page table levels for PPGTT virtual addresses is something that should be tracked at the platform level rather than the IP level. Even when mixing and matching various graphics, media, and display IP blocks, the platform as a whole has to have consistent page table handling. This is also a trait that should be tied to the platform even if the graphics IP itself is not present (e.g., if we disable the primary GT via configfs). v2: - Drop default value of 4 and explicitly set the value in each platform descriptor. (Lucas) v3: - Drop outdated code comment and commit message paragraph about default value. (Gustavo) v4: - Add missing setting for tgl_desc. (Gustavo) Cc: Lucas De Marchi Cc: Gustavo Sousa Reviewed-by: Lucas De Marchi Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251013200944.2499947-29-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_pci.c | 23 +++++++++++++++-------- drivers/gpu/drm/xe/xe_pci_types.h | 2 +- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index e0e3249045a2..ffb7afcda8de 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -51,13 +51,10 @@ __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); static const struct xe_graphics_desc graphics_xelp = { .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), - - .vm_max_level = 3, }; #define XE_HP_FEATURES \ - .has_range_tlb_inval = true, \ - .vm_max_level = 3 + .has_range_tlb_inval = true static const struct xe_graphics_desc graphics_xehpg = { .hw_engine_mask = @@ -82,7 +79,6 @@ static const struct xe_graphics_desc graphics_xehpc = { BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), XE_HP_FEATURES, - .vm_max_level = 4, .vram_flags = XE_VRAM_FLAGS_NEED64K, .has_asid = 1, @@ -105,7 +101,6 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_range_tlb_inval = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ - .vm_max_level = 4, \ .hw_engine_mask = \ BIT(XE_HW_ENGINE_RCS0) | \ BIT(XE_HW_ENGINE_BCS8) | BIT(XE_HW_ENGINE_BCS0) | \ @@ -171,6 +166,7 @@ static const struct xe_device_desc tgl_desc = { .max_gt_per_tile = 1, .require_force_probe = true, .va_bits = 48, + .vm_max_level = 3, }; static const struct xe_device_desc rkl_desc = { @@ -183,6 +179,7 @@ static const struct xe_device_desc rkl_desc = { .max_gt_per_tile = 1, .require_force_probe = true, .va_bits = 48, + .vm_max_level = 3, }; static const u16 adls_rpls_ids[] = { INTEL_RPLS_IDS(NOP), 0 }; @@ -202,6 +199,7 @@ static const struct xe_device_desc adl_s_desc = { {}, }, .va_bits = 48, + .vm_max_level = 3, }; static const u16 adlp_rplu_ids[] = { INTEL_RPLU_IDS(NOP), 0 }; @@ -221,6 +219,7 @@ static const struct xe_device_desc adl_p_desc = { {}, }, .va_bits = 48, + .vm_max_level = 3, }; static const struct xe_device_desc adl_n_desc = { @@ -234,6 +233,7 @@ static const struct xe_device_desc adl_n_desc = { .max_gt_per_tile = 1, .require_force_probe = true, .va_bits = 48, + .vm_max_level = 3, }; #define DGFX_FEATURES \ @@ -251,6 +251,7 @@ static const struct xe_device_desc dg1_desc = { .max_gt_per_tile = 1, .require_force_probe = true, .va_bits = 48, + .vm_max_level = 3, }; static const u16 dg2_g10_ids[] = { INTEL_DG2_G10_IDS(NOP), INTEL_ATS_M150_IDS(NOP), 0 }; @@ -268,7 +269,8 @@ static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 }; { XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \ { } \ }, \ - .va_bits = 48 + .va_bits = 48, \ + .vm_max_level = 3 static const struct xe_device_desc ats_m_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, @@ -307,6 +309,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .max_remote_tiles = 1, .require_force_probe = true, .va_bits = 57, + .vm_max_level = 4, .has_mbx_power_limits = false, }; @@ -319,6 +322,7 @@ static const struct xe_device_desc mtl_desc = { .has_pxp = true, .max_gt_per_tile = 2, .va_bits = 48, + .vm_max_level = 4, }; static const struct xe_device_desc lnl_desc = { @@ -329,6 +333,7 @@ static const struct xe_device_desc lnl_desc = { .max_gt_per_tile = 2, .needs_scratch = true, .va_bits = 48, + .vm_max_level = 4, }; static const struct xe_device_desc bmg_desc = { @@ -345,6 +350,7 @@ static const struct xe_device_desc bmg_desc = { .max_gt_per_tile = 2, .needs_scratch = true, .va_bits = 48, + .vm_max_level = 4, }; static const struct xe_device_desc ptl_desc = { @@ -356,6 +362,7 @@ static const struct xe_device_desc ptl_desc = { .needs_scratch = true, .needs_shared_vf_gt_wq = true, .va_bits = 48, + .vm_max_level = 4, }; #undef PLATFORM @@ -592,6 +599,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.dma_mask_size = desc->dma_mask_size; xe->info.va_bits = desc->va_bits; + xe->info.vm_max_level = desc->vm_max_level; xe->info.is_dgfx = desc->is_dgfx; xe->info.has_fan_control = desc->has_fan_control; @@ -723,7 +731,6 @@ static int xe_info_init(struct xe_device *xe, } xe->info.vram_flags = graphics_desc->vram_flags; - xe->info.vm_max_level = graphics_desc->vm_max_level; xe->info.has_asid = graphics_desc->has_asid; xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit; if (xe->info.platform != XE_PVC) diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 7c27e3742aa7..e59b59ec636d 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -31,6 +31,7 @@ struct xe_device_desc { u8 max_remote_tiles:2; u8 max_gt_per_tile:2; u8 va_bits; + u8 vm_max_level; u8 require_force_probe:1; u8 is_dgfx:1; @@ -53,7 +54,6 @@ struct xe_device_desc { }; struct xe_graphics_desc { - u8 vm_max_level; u8 vram_flags; u64 hw_engine_mask; /* hardware engines provided by graphics IP */ -- cgit v1.2.3 From 76b7aedd66040bc2d70f28e49cdf7533c971b2c0 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:48 -0700 Subject: drm/xe: Move 'vram_flags' flag back to platform descriptor Restrictions and requirements on VRAM alignment are something that should be tracked at the platform level rather than the IP level. Even when mixing and matching various graphics, media, and display IP blocks, the platform as a whole has to have consistent memory allocation handling. This is also a trait that should be tied to the platform even if the graphics IP itself is not present (e.g., if we disable the primary GT via configfs). Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251013200944.2499947-30-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_pci.c | 8 ++++---- drivers/gpu/drm/xe/xe_pci_types.h | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index ffb7afcda8de..01bb70285dac 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -63,7 +63,6 @@ static const struct xe_graphics_desc graphics_xehpg = { BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), XE_HP_FEATURES, - .vram_flags = XE_VRAM_FLAGS_NEED64K, .has_flat_ccs = 1, }; @@ -79,7 +78,6 @@ static const struct xe_graphics_desc graphics_xehpc = { BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), XE_HP_FEATURES, - .vram_flags = XE_VRAM_FLAGS_NEED64K, .has_asid = 1, .has_atomic_enable_pte_bit = 1, @@ -270,7 +268,8 @@ static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 }; { } \ }, \ .va_bits = 48, \ - .vm_max_level = 3 + .vm_max_level = 3, \ + .vram_flags = XE_VRAM_FLAGS_NEED64K static const struct xe_device_desc ats_m_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, @@ -310,6 +309,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .require_force_probe = true, .va_bits = 57, .vm_max_level = 4, + .vram_flags = XE_VRAM_FLAGS_NEED64K, .has_mbx_power_limits = false, }; @@ -600,6 +600,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.dma_mask_size = desc->dma_mask_size; xe->info.va_bits = desc->va_bits; xe->info.vm_max_level = desc->vm_max_level; + xe->info.vram_flags = desc->vram_flags; xe->info.is_dgfx = desc->is_dgfx; xe->info.has_fan_control = desc->has_fan_control; @@ -730,7 +731,6 @@ static int xe_info_init(struct xe_device *xe, media_desc = NULL; } - xe->info.vram_flags = graphics_desc->vram_flags; xe->info.has_asid = graphics_desc->has_asid; xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit; if (xe->info.platform != XE_PVC) diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index e59b59ec636d..3189bd95bb6e 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -32,6 +32,7 @@ struct xe_device_desc { u8 max_gt_per_tile:2; u8 va_bits; u8 vm_max_level; + u8 vram_flags; u8 require_force_probe:1; u8 is_dgfx:1; @@ -54,8 +55,6 @@ struct xe_device_desc { }; struct xe_graphics_desc { - u8 vram_flags; - u64 hw_engine_mask; /* hardware engines provided by graphics IP */ u8 has_asid:1; -- cgit v1.2.3 From 1a28651c0627add7928c47407d6a9b18a1a81c37 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:49 -0700 Subject: drm/xe: Move 'has_flatccs' flag back to platform descriptor FlatCCS presence/absence is a flag that should be tracked at the platform level rather than the IP level. FlatCCS affects the device-wide memory initialization and reservations so its effects are not confined to a single IP block or GT. This is also a trait that should be tied to the platform even if the graphics IP itself is not present (e.g., if we disable the primary GT via configfs). Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251013200944.2499947-31-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_pci.c | 12 ++++++------ drivers/gpu/drm/xe/xe_pci_types.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 01bb70285dac..3769456a72ad 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -63,8 +63,6 @@ static const struct xe_graphics_desc graphics_xehpg = { BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), XE_HP_FEATURES, - - .has_flat_ccs = 1, }; static const struct xe_graphics_desc graphics_xehpc = { @@ -95,7 +93,6 @@ static const struct xe_graphics_desc graphics_xelpg = { #define XE2_GFX_FEATURES \ .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ - .has_flat_ccs = 1, \ .has_range_tlb_inval = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ @@ -259,6 +256,7 @@ static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 }; #define DG2_FEATURES \ DGFX_FEATURES, \ PLATFORM(DG2), \ + .has_flat_ccs = 1, \ .has_gsc_nvm = 1, \ .has_heci_gscfi = 1, \ .subplatforms = (const struct xe_subplatform_desc[]) { \ @@ -329,6 +327,7 @@ static const struct xe_device_desc lnl_desc = { PLATFORM(LUNARLAKE), .dma_mask_size = 46, .has_display = true, + .has_flat_ccs = 1, .has_pxp = true, .max_gt_per_tile = 2, .needs_scratch = true, @@ -342,6 +341,7 @@ static const struct xe_device_desc bmg_desc = { .dma_mask_size = 46, .has_display = true, .has_fan_control = true, + .has_flat_ccs = 1, .has_mbx_power_limits = true, .has_gsc_nvm = 1, .has_heci_cscfi = 1, @@ -357,6 +357,7 @@ static const struct xe_device_desc ptl_desc = { PLATFORM(PANTHERLAKE), .dma_mask_size = 46, .has_display = true, + .has_flat_ccs = 1, .has_sriov = true, .max_gt_per_tile = 2, .needs_scratch = true, @@ -604,6 +605,8 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.is_dgfx = desc->is_dgfx; xe->info.has_fan_control = desc->has_fan_control; + /* runtime fusing may force flat_ccs to disabled later */ + xe->info.has_flat_ccs = desc->has_flat_ccs; xe->info.has_mbx_power_limits = desc->has_mbx_power_limits; xe->info.has_gsc_nvm = desc->has_gsc_nvm; xe->info.has_heci_gscfi = desc->has_heci_gscfi; @@ -736,9 +739,6 @@ static int xe_info_init(struct xe_device *xe, if (xe->info.platform != XE_PVC) xe->info.has_device_atomics_on_smem = 1; - /* Runtime detection may change this later */ - xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; - xe->info.has_range_tlb_inval = graphics_desc->has_range_tlb_inval; xe->info.has_usm = graphics_desc->has_usm; xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 3189bd95bb6e..a4451bdc79fb 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -39,6 +39,7 @@ struct xe_device_desc { u8 has_display:1; u8 has_fan_control:1; + u8 has_flat_ccs:1; u8 has_gsc_nvm:1; u8 has_heci_gscfi:1; u8 has_heci_cscfi:1; @@ -59,7 +60,6 @@ struct xe_graphics_desc { u8 has_asid:1; u8 has_atomic_enable_pte_bit:1; - u8 has_flat_ccs:1; u8 has_indirect_ring_state:1; u8 has_range_tlb_inval:1; u8 has_usm:1; -- cgit v1.2.3 From ff1d2b5e3d28a62e79c89d2b2ab28ef5eaab84d8 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:50 -0700 Subject: drm/xe: Read VF GMD_ID with a specifically-allocated dummy GT SRIOV VF initialization has a bit of a chicken and egg design problem. Determining the IP version of the graphics and media IPs can't be done via direct register reads as it is on PF or native and instead requires querying the GuC. However initialization of the GT, including its GuC, needs to wait until after we know the IP versions so that the proper initialization steps for the platform/IP are followed. Currently the (somewhat hacky) solution is to manually fill out just enough fields in tile 0's primary GT structure to make it look as if the GT has been initialized so that the GuC can be partially initialized and queried to obtain the GMD_ID values. When the GT gets properly initialized during the regular flows, the hacked-up values will get overwritten as part of the general initialization flows. Rather than using tile 0's primary GT structure to hold the hacked up values for querying every GT on every tile, instead allocate a dedicated dummy structure. This will allow us to move the tile->primary_gt's allocation to a more consistent place later in the initialization flow in future patches (i.e., we shouldn't even allocate this GT structure if the GT is disabled/unavailable). It also helps ensure there can't be any accidental leakage of initialization or state between the dummy initialization for GMD_ID and the real driver initialization of the GT. v2: - Initialize gt->tile for temporary GT. (CI, Michal) - Use scope-based cleanup handler to free temp GT. (Michal) - Propagate actual error code from xe_gt_sriov_vf_bootstrap() rather than just setting IP version to 0.0 now that read_gmdid() can return an error. (Michal) v3: - Explicitly initialize gt to NULL, just in case something else gets inserted before the kzalloc() in the future. (Lucas) Cc: Michal Wajdeczko Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251013200944.2499947-32-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/tests/xe_pci.c | 6 ++-- drivers/gpu/drm/xe/xe_pci.c | 70 +++++++++++++++++++++++---------------- 2 files changed, 45 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 663a79ec960d..f3179b31f13e 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -311,8 +311,8 @@ const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc } EXPORT_SYMBOL_IF_KUNIT(xe_pci_id_gen_param); -static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, - u32 *ver, u32 *revid) +static int fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, + u32 *ver, u32 *revid) { struct kunit *test = kunit_get_current_test(); struct xe_pci_fake_data *data = test->priv; @@ -324,6 +324,8 @@ static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, *ver = data->graphics_verx100; *revid = xe_step_to_gmdid(data->step.graphics); } + + return 0; } static void fake_xe_info_probe_tile_count(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3769456a72ad..f54b7963b9ba 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -464,7 +464,7 @@ enum xe_gmdid_type { GMDID_MEDIA }; -static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid) +static int read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid) { struct xe_mmio *mmio = xe_root_tile_mmio(xe); struct xe_reg gmdid_reg = GMD_ID; @@ -473,22 +473,24 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid); if (IS_SRIOV_VF(xe)) { - struct xe_gt *gt = xe_root_mmio_gt(xe); - /* * To get the value of the GMDID register, VFs must obtain it * from the GuC using MMIO communication. * - * Note that at this point the xe_gt is not fully uninitialized - * and only basic access to MMIO registers is possible. To use - * our existing GuC communication functions we must perform at - * least basic xe_gt and xe_guc initialization. - * - * Since to obtain the value of GMDID_MEDIA we need to use the - * media GuC, temporarily tweak the gt type. + * Note that at this point the GTs are not initialized and only + * tile-level access to MMIO registers is possible. To use our + * existing GuC communication functions we must create a dummy + * GT structure and perform at least basic xe_gt and xe_guc + * initialization. */ - xe_gt_assert(gt, gt->info.type == XE_GT_TYPE_UNINITIALIZED); + struct xe_gt *gt __free(kfree) = NULL; + int err; + + gt = kzalloc(sizeof(*gt), GFP_KERNEL); + if (!gt) + return -ENOMEM; + gt->tile = &xe->tiles[0]; if (type == GMDID_MEDIA) { gt->info.id = 1; gt->info.type = XE_GT_TYPE_MEDIA; @@ -500,15 +502,11 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, xe_gt_mmio_init(gt); xe_guc_comm_init_early(>->uc.guc); - /* Don't bother with GMDID if failed to negotiate the GuC ABI */ - val = xe_gt_sriov_vf_bootstrap(gt) ? 0 : xe_gt_sriov_vf_gmdid(gt); + err = xe_gt_sriov_vf_bootstrap(gt); + if (err) + return err; - /* - * Only undo xe_gt.info here, the remaining changes made above - * will be overwritten as part of the regular initialization. - */ - gt->info.id = 0; - gt->info.type = XE_GT_TYPE_UNINITIALIZED; + val = xe_gt_sriov_vf_gmdid(gt); } else { /* * GMD_ID is a GT register, but at this point in the driver @@ -526,6 +524,8 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, *ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val); *revid = REG_FIELD_GET(GMD_ID_REVID, val); + + return 0; } static const struct xe_ip *find_graphics_ip(unsigned int verx100) @@ -552,18 +552,21 @@ static const struct xe_ip *find_media_ip(unsigned int verx100) * Read IP version from hardware and select graphics/media IP descriptors * based on the result. */ -static void handle_gmdid(struct xe_device *xe, - const struct xe_ip **graphics_ip, - const struct xe_ip **media_ip, - u32 *graphics_revid, - u32 *media_revid) +static int handle_gmdid(struct xe_device *xe, + const struct xe_ip **graphics_ip, + const struct xe_ip **media_ip, + u32 *graphics_revid, + u32 *media_revid) { u32 ver; + int ret; *graphics_ip = NULL; *media_ip = NULL; - read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid); + ret = read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid); + if (ret) + return ret; *graphics_ip = find_graphics_ip(ver); if (!*graphics_ip) { @@ -571,16 +574,21 @@ static void handle_gmdid(struct xe_device *xe, ver / 100, ver % 100); } - read_gmdid(xe, GMDID_MEDIA, &ver, media_revid); + ret = read_gmdid(xe, GMDID_MEDIA, &ver, media_revid); + if (ret) + return ret; + /* Media may legitimately be fused off / not present */ if (ver == 0) - return; + return 0; *media_ip = find_media_ip(ver); if (!*media_ip) { drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n", ver / 100, ver % 100); } + + return 0; } /* @@ -691,6 +699,7 @@ static int xe_info_init(struct xe_device *xe, const struct xe_media_desc *media_desc; struct xe_tile *tile; struct xe_gt *gt; + int ret; u8 id; /* @@ -706,8 +715,11 @@ static int xe_info_init(struct xe_device *xe, xe->info.step = xe_step_pre_gmdid_get(xe); } else { xe_assert(xe, !desc->pre_gmdid_media_ip); - handle_gmdid(xe, &graphics_ip, &media_ip, - &graphics_gmdid_revid, &media_gmdid_revid); + ret = handle_gmdid(xe, &graphics_ip, &media_ip, + &graphics_gmdid_revid, &media_gmdid_revid); + if (ret) + return ret; + xe->info.step = xe_step_gmdid_get(xe, graphics_gmdid_revid, media_gmdid_revid); -- cgit v1.2.3 From 9c52402f6bd0b82f150236bb9b5895af00bb1188 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:51 -0700 Subject: drm/xe: Move primary GT allocation from xe_tile_init_early to xe_tile_init During the early days of the Xe driver, there were cases where we accessed some fields in the primary GT's xe_gt structure before the GT itself was formally initialized; this required that the structure itself be allocated during xe_tile_init_early(). A lot of refactoring of the device probe has happened since that time and there's no longer a need to allocate the primary GT early. Move the allocation into xe_info_init() where GT initialization happens and where we're doing the allocation of the media GT. v2: - Only make this change after a separate patch to perform VF GMD_ID lookup with a dummy GT instead of xe_root_mmio_gt(). Cc: Michal Wajdeczko Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251013200944.2499947-33-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_pci.c | 4 ++++ drivers/gpu/drm/xe/xe_tile.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f54b7963b9ba..0fea2ddb3c99 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -774,6 +774,10 @@ static int xe_info_init(struct xe_device *xe, for_each_tile(tile, xe, id) { int err; + tile->primary_gt = xe_gt_alloc(tile); + if (IS_ERR(tile->primary_gt)) + return PTR_ERR(tile->primary_gt); + gt = tile->primary_gt; gt->info.type = XE_GT_TYPE_MAIN; gt->info.id = tile->id * xe->info.max_gt_per_tile; diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 8fd6ee990d14..4f4f9a5c43af 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -157,10 +157,6 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) if (err) return err; - tile->primary_gt = xe_gt_alloc(tile); - if (IS_ERR(tile->primary_gt)) - return PTR_ERR(tile->primary_gt); - xe_pcode_init(tile); return 0; -- cgit v1.2.3 From 082547d8b401f8eef076d18344c267fe1bddb055 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:52 -0700 Subject: drm/xe: Skip L2 / TDF cache flushes if primary GT is disabled If the primary GT is disabled via configfs, GT-side L2 and TD cache flushes are unnecessary since nothing is using/filling these caches. Reviewed-by: Tejas Upadhyay Link: https://lore.kernel.org/r/20251013200944.2499947-34-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_device.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6f8f72fd1b13..e2aa79a78938 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -1062,6 +1062,8 @@ void xe_device_l2_flush(struct xe_device *xe) unsigned int fw_ref; gt = xe_root_mmio_gt(xe); + if (!gt) + return; if (!XE_GT_WA(gt, 16023588340)) return; @@ -1107,6 +1109,9 @@ void xe_device_td_flush(struct xe_device *xe) return; root_gt = xe_root_mmio_gt(xe); + if (!root_gt) + return; + if (XE_GT_WA(root_gt, 16023588340)) { /* A transient flush is not sufficient: flush the L2 */ xe_device_l2_flush(xe); -- cgit v1.2.3 From 090e7fc422a5810b0267d2e83859e05b53007563 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:53 -0700 Subject: drm/xe/query: Report hwconfig size as 0 if primary GT is disabled The hwconfig table is part of the primary GT's GuC firmware. If the primary GT is disabled, the hwconfig is unavailable and should be reported to userspace as having size 0. Reviewed-by: Tejas Upadhyay Link: https://lore.kernel.org/r/20251013200944.2499947-35-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 2e9ff33ed2fe..1c0915e2cc16 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -436,7 +436,7 @@ static int query_hwconfig(struct xe_device *xe, struct drm_xe_device_query *query) { struct xe_gt *gt = xe_root_mmio_gt(xe); - size_t size = xe_guc_hwconfig_size(>->uc.guc); + size_t size = gt ? xe_guc_hwconfig_size(>->uc.guc) : 0; void __user *query_ptr = u64_to_user_ptr(query->data); void *hwconfig; -- cgit v1.2.3 From 999ef874c1458e54ce221fcf2674eeb7c29508d5 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:54 -0700 Subject: drm/xe/pmu: Initialize PMU event types based on first available GT GT ID#0 (primary GT on tile 0) may not always be available if the primary GT has been disabled via configfs. Instead use the first available GT when determining which PMU events are supported. If there are no GTs, then don't advertise any GT-related events. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251013200944.2499947-36-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_pmu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c index cab51d826345..c63335eb69e5 100644 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -497,7 +497,12 @@ static const struct attribute_group *pmu_events_attr_update[] = { static void set_supported_events(struct xe_pmu *pmu) { struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); - struct xe_gt *gt = xe_device_get_gt(xe, 0); + struct xe_gt *gt; + int id; + + /* If there are no GTs, don't support any GT-related events */ + if (xe->info.gt_count == 0) + return; if (!xe->info.skip_guc_pc) { pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY); @@ -505,6 +510,10 @@ static void set_supported_events(struct xe_pmu *pmu) pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_REQUESTED_FREQUENCY); } + /* Find the first available GT to query engine event capabilities */ + for_each_gt(gt, xe, id) + break; + if (xe_guc_engine_activity_supported(>->uc.guc)) { pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_ACTIVE_TICKS); pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_TOTAL_TICKS); -- cgit v1.2.3 From d0ff153cca85520a2a14ee4edf44f6183b2e0e74 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:55 -0700 Subject: drm/xe: Check for primary GT before looking up Wa_22019338487 If the primary GT is disabled via configfs, we need to make sure that we don't search for this workaround on a NULL xe_gt pointer. Since we can disable the primary GT only on igpu platforms, the media GT is the one we'd want to check anyway for this workaround. The ternary operators in ggtt_update_access_counter() were getting a bit long/complicated, so rewrite them with regular if/else statements. While we're at it, throw in a couple extra assertions to make sure that we're truly picking the expected GT according to igpu/dgpu type. v2: - Adjust indentation/wrapping; it's easier to read this with longer, unwrapped lines. (Lucas) - Tweak wording of commit message to remove ambiguity. (Gustavo) Cc: Gustavo Sousa Reviewed-by: Lucas De Marchi Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251013200944.2499947-37-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_ggtt.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index aca7ae5489b9..40680f0c49a1 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -107,10 +107,23 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev) static void ggtt_update_access_counter(struct xe_ggtt *ggtt) { struct xe_tile *tile = ggtt->tile; - struct xe_gt *affected_gt = XE_GT_WA(tile->primary_gt, 22019338487) ? - tile->primary_gt : tile->media_gt; - struct xe_mmio *mmio = &affected_gt->mmio; - u32 max_gtt_writes = XE_GT_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63; + struct xe_gt *affected_gt; + u32 max_gtt_writes; + + if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 22019338487)) { + affected_gt = tile->primary_gt; + max_gtt_writes = 1100; + + /* Only expected to apply to primary GT on dgpu platforms */ + xe_tile_assert(tile, IS_DGFX(tile_to_xe(tile))); + } else { + affected_gt = tile->media_gt; + max_gtt_writes = 63; + + /* Only expected to apply to media GT on igpu platforms */ + xe_tile_assert(tile, !IS_DGFX(tile_to_xe(tile))); + } + /* * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit * to wait for completion of prior GTT writes before letting this through. @@ -119,7 +132,7 @@ static void ggtt_update_access_counter(struct xe_ggtt *ggtt) lockdep_assert_held(&ggtt->lock); if ((++ggtt->access_count % max_gtt_writes) == 0) { - xe_mmio_write32(mmio, GMD_ID, 0x0); + xe_mmio_write32(&affected_gt->mmio, GMD_ID, 0x0); ggtt->access_count = 0; } } @@ -291,10 +304,10 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) ggtt->size = GUC_GGTT_TOP; if (GRAPHICS_VERx100(xe) >= 1270) - ggtt->pt_ops = (ggtt->tile->media_gt && - XE_GT_WA(ggtt->tile->media_gt, 22019338487)) || - XE_GT_WA(ggtt->tile->primary_gt, 22019338487) ? - &xelpg_pt_wa_ops : &xelpg_pt_ops; + ggtt->pt_ops = + (ggtt->tile->media_gt && XE_GT_WA(ggtt->tile->media_gt, 22019338487)) || + (ggtt->tile->primary_gt && XE_GT_WA(ggtt->tile->primary_gt, 22019338487)) ? + &xelpg_pt_wa_ops : &xelpg_pt_ops; else ggtt->pt_ops = &xelp_pt_ops; -- cgit v1.2.3 From 886e5b6e5c96be7202bff2f8032157bd172e0927 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:56 -0700 Subject: drm/xe: Make display part of Wa_22019338487 a device workaround The display part of Wa_22019338487 (i.e., avoiding use of stolen memory) is using a platform test rather than an graphics/media IP test. Since this workaround is focused on non-GT uses of stolen memory, it makes sense that we'd want to still apply the workaround on affected platforms even if the GTs themselves are disabled via configfs. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251013200944.2499947-38-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 4 ++-- drivers/gpu/drm/xe/display/xe_plane_initial.c | 4 ++-- drivers/gpu/drm/xe/xe_device_wa_oob.rules | 1 + drivers/gpu/drm/xe/xe_wa_oob.rules | 1 - 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index 8ea9a472113c..af8139d00161 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -13,7 +13,7 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_wa.h" -#include +#include struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes) @@ -41,7 +41,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, size = PAGE_ALIGN(size); obj = ERR_PTR(-ENODEV); - if (!IS_DGFX(xe) && !XE_GT_WA(xe_root_mmio_gt(xe), 22019338487_display)) { + if (!IS_DGFX(xe) && !XE_DEVICE_WA(xe, 22019338487_display)) { obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), size, ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 94f00def811b..12d25c5290fd 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -25,7 +25,7 @@ #include "xe_vram_types.h" #include "xe_wa.h" -#include +#include void intel_plane_initial_vblank_wait(struct intel_crtc *crtc) { @@ -123,7 +123,7 @@ initial_plane_bo(struct xe_device *xe, phys_base = base; flags |= XE_BO_FLAG_STOLEN; - if (XE_GT_WA(xe_root_mmio_gt(xe), 22019338487_display)) + if (XE_DEVICE_WA(xe, 22019338487_display)) return NULL; /* diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules index 3a0c4ccc4224..3cc93f0e77f8 100644 --- a/drivers/gpu/drm/xe/xe_device_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules @@ -1,2 +1,3 @@ 15015404425 PLATFORM(LUNARLAKE) PLATFORM(PANTHERLAKE) +22019338487_display PLATFORM(LUNARLAKE) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index f3a6d5d239ce..eb761d30e066 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -45,7 +45,6 @@ 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) -22019338487_display PLATFORM(LUNARLAKE) 16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) -- cgit v1.2.3 From 4d292406823b505bd14bd7e02d2cae7f5fb0a97a Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:57 -0700 Subject: drm/xe/irq: Don't try to lookup engine masks for non-existent primary GT If the primary GT is disabled via configfs, we shouldn't try to access it to lookup BCS/CCS engine masks. For the purposes of IRQ reset (which masks & disables interrupts in an sgunit register), assume all possible instances are present. Reviewed-by: Tejas Upadhyay Link: https://lore.kernel.org/r/20251013200944.2499947-39-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_irq.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 9b938f1edaf5..838fb512b777 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -494,11 +494,15 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) static void gt_irq_reset(struct xe_tile *tile) { struct xe_mmio *mmio = &tile->mmio; - - u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, - XE_ENGINE_CLASS_COMPUTE); - u32 bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, - XE_ENGINE_CLASS_COPY); + u32 ccs_mask = ~0; + u32 bcs_mask = ~0; + + if (tile->primary_gt) { + ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, + XE_ENGINE_CLASS_COMPUTE); + bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, + XE_ENGINE_CLASS_COPY); + } /* Disable RCS, BCS, VCS and VECS class engines. */ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, 0); -- cgit v1.2.3 From 78de8f87668334a3bfadad52e5142fc19dad1807 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:58 -0700 Subject: drm/xe: Handle Wa_22010954014 and Wa_14022085890 as device workarounds When Wa_22010954014 and Wa_14022085890 were first implemented, we didn't have a device workaround infrastructure so we hacked them into the GT workaround list. Now that we have proper device workaround support, move them to the proper place. Note that Wa_14022085890 specifically applies to BMG-G21 platforms, so this requires defining a BMG subplatform to capture the correct subset of device IDs. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251013200944.2499947-40-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_device_wa_oob.rules | 2 ++ drivers/gpu/drm/xe/xe_guc_pc.c | 3 ++- drivers/gpu/drm/xe/xe_pci.c | 6 ++++++ drivers/gpu/drm/xe/xe_platform_types.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 2 +- drivers/gpu/drm/xe/xe_wa_oob.rules | 5 ----- include/drm/intel/pciids.h | 7 +++++-- 7 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules index 3cc93f0e77f8..55ba01bc8f38 100644 --- a/drivers/gpu/drm/xe/xe_device_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules @@ -1,3 +1,5 @@ +22010954014 PLATFORM(DG2) 15015404425 PLATFORM(LUNARLAKE) PLATFORM(PANTHERLAKE) 22019338487_display PLATFORM(LUNARLAKE) +14022085890 SUBPLATFORM(BATTLEMAGE, G21) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 3c0feb50a1e2..ff22235857f8 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -14,6 +14,7 @@ #include #include +#include #include #include "abi/guc_actions_slpc_abi.h" @@ -886,7 +887,7 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) if (pc_get_min_freq(pc) > pc->rp0_freq) ret = pc_set_min_freq(pc, pc->rp0_freq); - if (XE_GT_WA(tile->primary_gt, 14022085890)) + if (XE_DEVICE_WA(tile_to_xe(tile), 14022085890)) ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc))); out: diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 0fea2ddb3c99..368181b4d0b3 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -335,6 +335,8 @@ static const struct xe_device_desc lnl_desc = { .vm_max_level = 4, }; +static const u16 bmg_g21_ids[] = { INTEL_BMG_G21_IDS(NOP), 0 }; + static const struct xe_device_desc bmg_desc = { DGFX_FEATURES, PLATFORM(BATTLEMAGE), @@ -349,6 +351,10 @@ static const struct xe_device_desc bmg_desc = { .has_sriov = true, .max_gt_per_tile = 2, .needs_scratch = true, + .subplatforms = (const struct xe_subplatform_desc[]) { + { XE_SUBPLATFORM_BATTLEMAGE_G21, "G21", bmg_g21_ids }, + { } + }, .va_bits = 48, .vm_max_level = 4, }; diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index d08574c4cdb8..3e332214c7bb 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -34,6 +34,7 @@ enum xe_subplatform { XE_SUBPLATFORM_DG2_G10, XE_SUBPLATFORM_DG2_G11, XE_SUBPLATFORM_DG2_G12, + XE_SUBPLATFORM_BATTLEMAGE_G21, }; #endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c60159a13001..aa1b69f48f6f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -1138,6 +1138,6 @@ void xe_wa_apply_tile_workarounds(struct xe_tile *tile) if (IS_SRIOV_VF(tile->xe)) return; - if (XE_GT_WA(tile->primary_gt, 22010954014)) + if (XE_DEVICE_WA(tile->xe, 22010954014)) xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); } diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index eb761d30e066..113a62f1b541 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -14,7 +14,6 @@ 14016763929 SUBPLATFORM(DG2, G10) SUBPLATFORM(DG2, G12) 16017236439 PLATFORM(PVC) -22010954014 PLATFORM(DG2) 14019821291 MEDIA_VERSION_RANGE(1300, 2000) 14015076503 MEDIA_VERSION(1300) 16020292621 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) @@ -74,9 +73,5 @@ 16023683509 MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_psmi_enabled) -# SoC workaround - currently applies to all platforms with the following -# primary GT GMDID -14022085890 GRAPHICS_VERSION(2001) - 15015404425_disable PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER) 16026007364 MEDIA_VERSION(3000) diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index da6301a6fcea..0cd12616d621 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -849,7 +849,7 @@ MACRO__(0x64B0, ## __VA_ARGS__) /* BMG */ -#define INTEL_BMG_IDS(MACRO__, ...) \ +#define INTEL_BMG_G21_IDS(MACRO__, ...) \ MACRO__(0xE202, ## __VA_ARGS__), \ MACRO__(0xE209, ## __VA_ARGS__), \ MACRO__(0xE20B, ## __VA_ARGS__), \ @@ -858,7 +858,10 @@ MACRO__(0xE210, ## __VA_ARGS__), \ MACRO__(0xE211, ## __VA_ARGS__), \ MACRO__(0xE212, ## __VA_ARGS__), \ - MACRO__(0xE216, ## __VA_ARGS__), \ + MACRO__(0xE216, ## __VA_ARGS__) + +#define INTEL_BMG_IDS(MACRO__, ...) \ + INTEL_BMG_G21_IDS(MACRO__, __VA_ARGS__), \ MACRO__(0xE220, ## __VA_ARGS__), \ MACRO__(0xE221, ## __VA_ARGS__), \ MACRO__(0xE222, ## __VA_ARGS__), \ -- cgit v1.2.3 From 794e735cb6d52713b382d87a8c14678e15d3e7f8 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:09:59 -0700 Subject: drm/xe/rtp: Pass xe_device parameter to FUNC matches FUNC matches in RTP only pass the GT and hwe, preventing them from being used effectively in device workarounds. Add an additional xe_device parameter so that we can use them in device workarounds where a GT may not be available. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251013200944.2499947-41-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 6 ++++-- drivers/gpu/drm/xe/xe_hw_engine.c | 10 ++++++---- drivers/gpu/drm/xe/xe_reg_whitelist.c | 3 ++- drivers/gpu/drm/xe/xe_rtp.c | 24 +++++++++++++----------- drivers/gpu/drm/xe/xe_rtp.h | 18 +++++++++++++----- drivers/gpu/drm/xe/xe_rtp_types.h | 4 +++- 6 files changed, 41 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index b0254b014fe4..d2255a59e58f 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -48,12 +48,14 @@ struct rtp_test_case { const struct xe_rtp_entry *entries; }; -static bool match_yes(const struct xe_gt *gt, const struct xe_hw_engine *hwe) +static bool match_yes(const struct xe_device *xe, const struct xe_gt *gt, + const struct xe_hw_engine *hwe) { return true; } -static bool match_no(const struct xe_gt *gt, const struct xe_hw_engine *hwe) +static bool match_no(const struct xe_device *xe, const struct xe_gt *gt, + const struct xe_hw_engine *hwe) { return false; } diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 1cf623b4a5bc..cba4375525c7 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -346,17 +346,19 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); } -static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, +static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { return xe_gt_ccs_mode_enabled(gt) && - xe_rtp_match_first_render_or_compute(gt, hwe); + xe_rtp_match_first_render_or_compute(xe, gt, hwe); } -static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt, +static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { - if (GRAPHICS_VER(gt_to_xe(gt)) < 20) + if (GRAPHICS_VER(xe) < 20) return false; if (hwe->class != XE_ENGINE_CLASS_COMPUTE && diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 23f6c81d9994..690bc327a363 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -19,7 +19,8 @@ #undef XE_REG_MCR #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) -static bool match_not_render(const struct xe_gt *gt, +static bool match_not_render(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { return hwe->class != XE_ENGINE_CLASS_RENDER; diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index b5f430d59f80..66707cc89ec9 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -133,10 +133,7 @@ static bool rule_matches(const struct xe_device *xe, match = hwe->class != r->engine_class; break; case XE_RTP_MATCH_FUNC: - if (drm_WARN_ON(&xe->drm, !gt)) - return false; - - match = r->match_func(gt, hwe); + match = r->match_func(xe, gt, hwe); break; default: drm_warn(&xe->drm, "Invalid RTP match %u\n", @@ -343,13 +340,15 @@ void xe_rtp_process(struct xe_rtp_process_ctx *ctx, } EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process); -bool xe_rtp_match_even_instance(const struct xe_gt *gt, +bool xe_rtp_match_even_instance(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { return hwe->instance % 2 == 0; } -bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, +bool xe_rtp_match_first_render_or_compute(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { u64 render_compute_mask = gt->info.engine_mask & @@ -359,19 +358,22 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, hwe->engine_id == __ffs(render_compute_mask); } -bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, +bool xe_rtp_match_not_sriov_vf(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { - return !IS_SRIOV_VF(gt_to_xe(gt)); + return !IS_SRIOV_VF(xe); } -bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt, +bool xe_rtp_match_psmi_enabled(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { - return xe_configfs_get_psmi_enabled(to_pci_dev(gt_to_xe(gt)->drm.dev)); + return xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev)); } -bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_gt *gt, +bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { return xe_gt_has_discontiguous_dss_groups(gt); diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index ac12ddf6cde6..e5b8a9452e29 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -440,18 +440,21 @@ void xe_rtp_process(struct xe_rtp_process_ctx *ctx, /** * xe_rtp_match_even_instance - Match if engine instance is even + * @xe: Device structure * @gt: GT structure * @hwe: Engine instance * * Returns: true if engine instance is even, false otherwise */ -bool xe_rtp_match_even_instance(const struct xe_gt *gt, +bool xe_rtp_match_even_instance(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe); /* * xe_rtp_match_first_render_or_compute - Match if it's first render or compute * engine in the GT * + * @xe: Device structure * @gt: GT structure * @hwe: Engine instance * @@ -463,24 +466,29 @@ bool xe_rtp_match_even_instance(const struct xe_gt *gt, * Returns: true if engine id is the first to match the render reset domain, * false otherwise. */ -bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, +bool xe_rtp_match_first_render_or_compute(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe); /* * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device * + * @xe: Device structure * @gt: GT structure * @hwe: Engine instance * * Returns: true if device is not VF, false otherwise. */ -bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, +bool xe_rtp_match_not_sriov_vf(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe); -bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt, +bool xe_rtp_match_psmi_enabled(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe); -bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_gt *gt, +bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe); #endif diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index f4cf30e298cf..6ba7f226c227 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -10,6 +10,7 @@ #include "regs/xe_reg_defs.h" +struct xe_device; struct xe_hw_engine; struct xe_gt; @@ -86,7 +87,8 @@ struct xe_rtp_rule { u8 engine_class; }; /* MATCH_FUNC */ - bool (*match_func)(const struct xe_gt *gt, + bool (*match_func)(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe); }; }; -- cgit v1.2.3 From 4f3ecdb6ea2cde339148c7b200e50ef061bea0e2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:10:00 -0700 Subject: drm/xe: Bypass Wa_14018094691 when primary GT is disabled Don't try to lookup Wa_14018094691 on a NULL GT when the primary GT is disabled. Since this whole workaround centers around mid-thread preemption behavior, the workaround isn't relevant if the primary GT (where the engines that can do MTP live) is disabled. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251013200944.2499947-42-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_gsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 83d61bf8ec62..dd69cb834f8e 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -266,7 +266,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) unsigned int fw_ref; int ret; - if (XE_GT_WA(tile->primary_gt, 14018094691)) { + if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 14018094691)) { fw_ref = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* @@ -281,7 +281,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); - if (XE_GT_WA(tile->primary_gt, 14018094691)) + if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), fw_ref); if (ret) -- cgit v1.2.3 From 6a913fc86a847f011b22f5d8f8816aabaa48437c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:10:01 -0700 Subject: drm/xe: Correct lineage for Wa_22014953428 and only check with valid GT Wa_22014953428 was incorrectly labelled with a release-specific ID number rather than the cross-platform lineage number; fix that. Also check that the GT is not NULL before trying to lookup the workaround in it. Since this workaround only applies to DG2 discrete GPUs (where the primary GT cannot be disabled), no coverage is lost. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251013200944.2499947-43-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_vm.c | 3 ++- drivers/gpu/drm/xe/xe_wa_oob.rules | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 179758ca7cb8..08bc55bd91d7 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1910,6 +1910,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_vm_create *args = data; + struct xe_gt *wa_gt = xe_root_mmio_gt(xe); struct xe_vm *vm; u32 id; int err; @@ -1918,7 +1919,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; - if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929)) + if (wa_gt && XE_GT_WA(wa_gt, 22014953428)) args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 113a62f1b541..4bb94e5799ed 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -11,7 +11,7 @@ 18020744125 PLATFORM(PVC) 1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0) 1409600907 GRAPHICS_VERSION_RANGE(1200, 1250) -14016763929 SUBPLATFORM(DG2, G10) +22014953428 SUBPLATFORM(DG2, G10) SUBPLATFORM(DG2, G12) 16017236439 PLATFORM(PVC) 14019821291 MEDIA_VERSION_RANGE(1300, 2000) -- cgit v1.2.3 From 5dfc8989694001aff99348eef3c69c3294ea8e09 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:10:02 -0700 Subject: drm/xe: Check that GT is not NULL before testing Wa_16023588340 If the primary GT is disabled, skip the check for this workaround (which only applies to dgpu platforms where the primary GT cannot be NULL). Reviewed-by: Tejas Upadhyay Link: https://lore.kernel.org/r/20251013200944.2499947-44-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/display/xe_display_wa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/display/xe_display_wa.c b/drivers/gpu/drm/xe/display/xe_display_wa.c index 8ada1cbcb16c..2aa1b8c03411 100644 --- a/drivers/gpu/drm/xe/display/xe_display_wa.c +++ b/drivers/gpu/drm/xe/display/xe_display_wa.c @@ -13,6 +13,7 @@ bool intel_display_needs_wa_16023588340(struct intel_display *display) { struct xe_device *xe = to_xe_device(display->drm); + struct xe_gt *wa_gt = xe_root_mmio_gt(xe); - return XE_GT_WA(xe_root_mmio_gt(xe), 16023588340); + return wa_gt && XE_GT_WA(wa_gt, 16023588340); } -- cgit v1.2.3 From ad0084f3f8ba1891b1e59b9606eae8c2474a1496 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:10:03 -0700 Subject: drm/xe: Don't check BIOS-disabled FlatCCS if primary GT is disabled If the primary is GT is disabled via configfs, we can't read the GT registers that would tell us whether the BIOS has disabled FlatCCS on a platform that would otherwise have it; we'll just proceed as if the FlatCCS is still enabled. This is similar to the situation seen by SRIOV VFs and doesn't cause any functional problems since the hardware will simply drop writes to the CCS region and reads will always come back as 0 (indicating uncompressed data). We'll simply miss out on the chance to avoid some unnecessary overhead during BO creation and migration. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251013200944.2499947-45-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index e2aa79a78938..5f6a412b571c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -782,6 +782,8 @@ static int probe_has_flat_ccs(struct xe_device *xe) return 0; gt = xe_root_mmio_gt(xe); + if (!gt) + return 0; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) -- cgit v1.2.3 From 6675c9e209155fb7a454325760cb5bc5111671f4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:10:04 -0700 Subject: drm/xe: Break GT setup out of xe_info_init() xe_info_init() is getting a bit long and hard to follow. Break the allocation and basic initialization of the xe_gt structures out to their own functions. v2: - Rename new functions from init_* to alloc_*. (Gustavo) - Move early NULL return of media GT before allocation. (Gustavo) Cc: Gustavo Sousa Reviewed-by: Lucas De Marchi Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251013200944.2499947-46-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_pci.c | 84 ++++++++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 368181b4d0b3..dee06f115751 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -689,6 +689,53 @@ static void xe_info_probe_tile_count(struct xe_device *xe) } } +static struct xe_gt *alloc_primary_gt(struct xe_tile *tile, + const struct xe_graphics_desc *graphics_desc, + const struct xe_media_desc *media_desc) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_gt *gt; + + gt = xe_gt_alloc(tile); + if (IS_ERR(gt)) + return gt; + + gt->info.type = XE_GT_TYPE_MAIN; + gt->info.id = tile->id * xe->info.max_gt_per_tile; + gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; + gt->info.engine_mask = graphics_desc->hw_engine_mask; + + /* + * Before media version 13, the media IP was part of the primary GT + * so we need to add the media engines to the primary GT's engine list. + */ + if (MEDIA_VER(xe) < 13 && media_desc) + gt->info.engine_mask |= media_desc->hw_engine_mask; + + return gt; +} + +static struct xe_gt *alloc_media_gt(struct xe_tile *tile, + const struct xe_media_desc *media_desc) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_gt *gt; + + if (MEDIA_VER(xe) < 13 || !media_desc) + return NULL; + + gt = xe_gt_alloc(tile); + if (IS_ERR(gt)) + return gt; + + gt->info.type = XE_GT_TYPE_MEDIA; + gt->info.id = tile->id * xe->info.max_gt_per_tile + 1; + gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; + gt->info.engine_mask = media_desc->hw_engine_mask; + + return gt; +} + /* * Initialize device info content that does require knowledge about * graphics / media IP version. @@ -771,48 +818,21 @@ static int xe_info_init(struct xe_device *xe, return err; } - /* - * All platforms have at least one primary GT. Any platform with media - * version 13 or higher has an additional dedicated media GT. And - * depending on the graphics IP there may be additional "remote tiles." - * All of these together determine the overall GT count. - */ + /* Allocate any GT and VRAM structures necessary for the platform. */ for_each_tile(tile, xe, id) { int err; - tile->primary_gt = xe_gt_alloc(tile); - if (IS_ERR(tile->primary_gt)) - return PTR_ERR(tile->primary_gt); - - gt = tile->primary_gt; - gt->info.type = XE_GT_TYPE_MAIN; - gt->info.id = tile->id * xe->info.max_gt_per_tile; - gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; - gt->info.engine_mask = graphics_desc->hw_engine_mask; - err = xe_tile_alloc_vram(tile); if (err) return err; - if (MEDIA_VER(xe) < 13 && media_desc) - gt->info.engine_mask |= media_desc->hw_engine_mask; - - if (MEDIA_VER(xe) < 13 || !media_desc) - continue; + tile->primary_gt = alloc_primary_gt(tile, graphics_desc, media_desc); + if (IS_ERR(tile->primary_gt)) + return PTR_ERR(tile->primary_gt); - /* - * Allocate and setup media GT for platforms with standalone - * media. - */ - tile->media_gt = xe_gt_alloc(tile); + tile->media_gt = alloc_media_gt(tile, media_desc); if (IS_ERR(tile->media_gt)) return PTR_ERR(tile->media_gt); - - gt = tile->media_gt; - gt->info.type = XE_GT_TYPE_MEDIA; - gt->info.id = tile->id * xe->info.max_gt_per_tile + 1; - gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; - gt->info.engine_mask = media_desc->hw_engine_mask; } /* -- cgit v1.2.3 From 7abd69278bb53ab6b43f9650daba550cc7624858 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:10:05 -0700 Subject: drm/xe/configfs: Add attribute to disable GT types Preventing the driver from initializing GTs of specific type(s) can be useful for debugging and early hardware bringup. Add a configfs attribute to allow this kind of control for debugging. With today's platforms and software design, this configuration setting is only effective for disabling the media GT since the driver currently requires that there always be a primary GT to probe the device. However this might change in the future --- in theory it should be possible (with some additional driver work) to allow an igpu device to come up with only the media GT and no primary GT. Or to allow an igpu device to come up with no GTs at all (for display-only usage). A primary GT will likely always be required on dgpu platforms because we rely on the BCS engines inside the primary GT for various vram operations. v2: - Expand/clarify kerneldoc for configfs attribute. (Gustavo) - Tighten type usage in gt_types[] structure. (Gustavo) - Adjust string parsing/name matching to match exact GT names and not accept partial names. (Gustavo) v3: - Switch to scope-based cleanup in gt_types_allowed_store() to fix a leak if the device is already bound. (Gustavo) - Switch configfs lookup interface to two boolean functions that specify whether primary/media are supported rather than one function that returns a mask. This is simpler to use and understand. v4: - Rename xe_configfs_*_gt_supported to xe_configfs_*_gt_allowed for consistency with configfs interface and other functions. (Gustavo) - Simplify boolean check in xe_configfs_*_gt_allowed. (Michal) - Use xe_info() for message printing. (Michal) - Use guard() instead of scoped_guard(). (Michal) - Make new functions take 'struct pci_dev' for consistency with other configfs lookup functions. (Michal) Cc: Gustavo Sousa Cc: Michal Wajdeczko Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251013200944.2499947-47-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_configfs.c | 143 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_configfs.h | 4 ++ drivers/gpu/drm/xe/xe_pci.c | 23 +++++++ 3 files changed, 170 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 464a79c2a903..c1419a270fa4 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -15,6 +15,7 @@ #include "instructions/xe_mi_commands.h" #include "xe_configfs.h" +#include "xe_gt_types.h" #include "xe_hw_engine_types.h" #include "xe_module.h" #include "xe_pci_types.h" @@ -57,6 +58,7 @@ * : * └── 0000:03:00.0 * ├── survivability_mode + * ├── gt_types_allowed * ├── engines_allowed * └── enable_psmi * @@ -80,6 +82,44 @@ * * This attribute can only be set before binding to the device. * + * Allowed GT types: + * ----------------- + * + * Allow only specific types of GTs to be detected and initialized by the + * driver. Any combination of GT types can be enabled/disabled, although + * some settings will cause the device to fail to probe. + * + * Writes support both comma- and newline-separated input format. Reads + * will always return one GT type per line. "primary" and "media" are the + * GT type names supported by this interface. + * + * This attribute can only be set before binding to the device. + * + * Examples: + * + * Allow both primary and media GTs to be initialized and used. This matches + * the driver's default behavior:: + * + * # echo 'primary,media' > /sys/kernel/config/xe/0000:03:00.0/gt_types_allowed + * + * Allow only the primary GT of each tile to be initialized and used, + * effectively disabling the media GT if it exists on the platform:: + * + * # echo 'primary' > /sys/kernel/config/xe/0000:03:00.0/gt_types_allowed + * + * Allow only the media GT of each tile to be initialized and used, + * effectively disabling the primary GT. **This configuration will cause + * device probe failure on all current platforms, but may be allowed on + * igpu platforms in the future**:: + * + * # echo 'media' > /sys/kernel/config/xe/0000:03:00.0/gt_types_allowed + * + * Disable all GTs. Only other GPU IP (such as display) is potentially usable. + * **This configuration will cause device probe failure on all current + * platforms, but may be allowed on igpu platforms in the future**:: + * + * # echo '' > /sys/kernel/config/xe/0000:03:00.0/gt_types_allowed + * * Allowed engines: * ---------------- * @@ -215,6 +255,7 @@ struct xe_config_group_device { struct config_group sriov; struct xe_config_device { + u64 gt_types_allowed; u64 engines_allowed; struct wa_bb ctx_restore_post_bb[XE_ENGINE_CLASS_MAX]; struct wa_bb ctx_restore_mid_bb[XE_ENGINE_CLASS_MAX]; @@ -234,6 +275,7 @@ struct xe_config_group_device { }; static const struct xe_config_device device_defaults = { + .gt_types_allowed = U64_MAX, .engines_allowed = U64_MAX, .survivability_mode = false, .enable_psmi = false, @@ -259,6 +301,7 @@ struct engine_info { /* Some helpful macros to aid on the sizing of buffer allocation when parsing */ #define MAX_ENGINE_CLASS_CHARS 5 #define MAX_ENGINE_INSTANCE_CHARS 2 +#define MAX_GT_TYPE_CHARS 7 static const struct engine_info engine_info[] = { { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK, .engine_class = XE_ENGINE_CLASS_RENDER }, @@ -269,6 +312,14 @@ static const struct engine_info engine_info[] = { { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK, .engine_class = XE_ENGINE_CLASS_OTHER }, }; +static const struct { + const char name[MAX_GT_TYPE_CHARS + 1]; + enum xe_gt_type type; +} gt_types[] = { + { .name = "primary", .type = XE_GT_TYPE_MAIN }, + { .name = "media", .type = XE_GT_TYPE_MEDIA }, +}; + static struct xe_config_group_device *to_xe_config_group_device(struct config_item *item) { return container_of(to_config_group(item), struct xe_config_group_device, group); @@ -331,6 +382,57 @@ static ssize_t survivability_mode_store(struct config_item *item, const char *pa return len; } +static ssize_t gt_types_allowed_show(struct config_item *item, char *page) +{ + struct xe_config_device *dev = to_xe_config_device(item); + char *p = page; + + for (size_t i = 0; i < ARRAY_SIZE(gt_types); i++) + if (dev->gt_types_allowed & BIT_ULL(gt_types[i].type)) + p += sprintf(p, "%s\n", gt_types[i].name); + + return p - page; +} + +static ssize_t gt_types_allowed_store(struct config_item *item, const char *page, + size_t len) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + char *buf __free(kfree) = kstrdup(page, GFP_KERNEL); + char *p = buf; + u64 typemask = 0; + + if (!buf) + return -ENOMEM; + + while (p) { + char *typename = strsep(&p, ",\n"); + bool matched = false; + + if (typename[0] == '\0') + continue; + + for (size_t i = 0; i < ARRAY_SIZE(gt_types); i++) { + if (strcmp(typename, gt_types[i].name) == 0) { + typemask |= BIT(gt_types[i].type); + matched = true; + break; + } + } + + if (!matched) + return -EINVAL; + } + + guard(mutex)(&dev->lock); + if (is_bound(dev)) + return -EBUSY; + + dev->config.gt_types_allowed = typemask; + + return len; +} + static ssize_t engines_allowed_show(struct config_item *item, char *page) { struct xe_config_device *dev = to_xe_config_device(item); @@ -711,6 +813,7 @@ CONFIGFS_ATTR(, ctx_restore_mid_bb); CONFIGFS_ATTR(, ctx_restore_post_bb); CONFIGFS_ATTR(, enable_psmi); CONFIGFS_ATTR(, engines_allowed); +CONFIGFS_ATTR(, gt_types_allowed); CONFIGFS_ATTR(, survivability_mode); static struct configfs_attribute *xe_config_device_attrs[] = { @@ -718,6 +821,7 @@ static struct configfs_attribute *xe_config_device_attrs[] = { &attr_ctx_restore_post_bb, &attr_enable_psmi, &attr_engines_allowed, + &attr_gt_types_allowed, &attr_survivability_mode, NULL, }; @@ -957,6 +1061,7 @@ static void dump_custom_dev_config(struct pci_dev *pdev, dev->config.attr_); \ } while (0) + PRI_CUSTOM_ATTR("%llx", gt_types_allowed); PRI_CUSTOM_ATTR("%llx", engines_allowed); PRI_CUSTOM_ATTR("%d", enable_psmi); PRI_CUSTOM_ATTR("%d", survivability_mode); @@ -1007,6 +1112,44 @@ bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) return mode; } +static u64 get_gt_types_allowed(struct pci_dev *pdev) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + u64 mask; + + if (!dev) + return device_defaults.gt_types_allowed; + + mask = dev->config.gt_types_allowed; + config_group_put(&dev->group); + + return mask; +} + +/** + * xe_configfs_primary_gt_allowed - determine whether primary GTs are supported + * @pdev: pci device + * + * Return: True if primary GTs are enabled, false if they have been disabled via + * configfs. + */ +bool xe_configfs_primary_gt_allowed(struct pci_dev *pdev) +{ + return get_gt_types_allowed(pdev) & BIT_ULL(XE_GT_TYPE_MAIN); +} + +/** + * xe_configfs_media_gt_allowed - determine whether media GTs are supported + * @pdev: pci device + * + * Return: True if the media GTs are enabled, false if they have been disabled + * via configfs. + */ +bool xe_configfs_media_gt_allowed(struct pci_dev *pdev) +{ + return get_gt_types_allowed(pdev) & BIT_ULL(XE_GT_TYPE_MEDIA); +} + /** * xe_configfs_get_engines_allowed - get engine allowed mask from configfs * @pdev: pci device diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h index 16a1f578e4fe..fed57be0b90e 100644 --- a/drivers/gpu/drm/xe/xe_configfs.h +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -17,6 +17,8 @@ int xe_configfs_init(void); void xe_configfs_exit(void); void xe_configfs_check_device(struct pci_dev *pdev); bool xe_configfs_get_survivability_mode(struct pci_dev *pdev); +bool xe_configfs_primary_gt_allowed(struct pci_dev *pdev); +bool xe_configfs_media_gt_allowed(struct pci_dev *pdev); u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev); bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev); u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, @@ -31,6 +33,8 @@ static inline int xe_configfs_init(void) { return 0; } static inline void xe_configfs_exit(void) { } static inline void xe_configfs_check_device(struct pci_dev *pdev) { } static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; } +static inline bool xe_configfs_primary_gt_allowed(struct pci_dev *pdev) { return true; } +static inline bool xe_configfs_media_gt_allowed(struct pci_dev *pdev) { return true; } static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; } static inline bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) { return false; } static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index dee06f115751..8c12db455eef 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -30,6 +30,7 @@ #include "xe_pci_sriov.h" #include "xe_pci_types.h" #include "xe_pm.h" +#include "xe_printk.h" #include "xe_sriov.h" #include "xe_step.h" #include "xe_survivability_mode.h" @@ -696,6 +697,11 @@ static struct xe_gt *alloc_primary_gt(struct xe_tile *tile, struct xe_device *xe = tile_to_xe(tile); struct xe_gt *gt; + if (!xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev))) { + xe_info(xe, "Primary GT disabled via configfs\n"); + return NULL; + } + gt = xe_gt_alloc(tile); if (IS_ERR(gt)) return gt; @@ -721,6 +727,11 @@ static struct xe_gt *alloc_media_gt(struct xe_tile *tile, struct xe_device *xe = tile_to_xe(tile); struct xe_gt *gt; + if (!xe_configfs_media_gt_allowed(to_pci_dev(xe->drm.dev))) { + xe_info(xe, "Media GT disabled via configfs\n"); + return NULL; + } + if (MEDIA_VER(xe) < 13 || !media_desc) return NULL; @@ -830,6 +841,18 @@ static int xe_info_init(struct xe_device *xe, if (IS_ERR(tile->primary_gt)) return PTR_ERR(tile->primary_gt); + /* + * It's not currently possible to probe a device with the + * primary GT disabled. With some work, this may be future in + * the possible for igpu platforms (although probably not for + * dgpu's since access to the primary GT's BCS engines is + * required for VRAM management). + */ + if (!tile->primary_gt) { + drm_err(&xe->drm, "Cannot probe device with without a primary GT\n"); + return -ENODEV; + } + tile->media_gt = alloc_media_gt(tile, media_desc); if (IS_ERR(tile->media_gt)) return PTR_ERR(tile->media_gt); -- cgit v1.2.3 From 961a7582b159586aafee3cf2e81dec1d493d1715 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 13:10:06 -0700 Subject: drm/xe/sriov: Disable SR-IOV if primary GT is disabled via configfs SR-IOV operation relies on the primary GT's GuC to operate (in both PF and VF mode). Don't enable the .has_sriov flag if the primary-GT was disabled by configfs. v2: - Move handling to xe_info_init(). (Michal) v3: - Just update the .has_sriov flag in xe_info_init_early(). (Michal) v4: - Drop unnecessary comment. (Michal) - Flip condition order for consistency with other checks. (Michal) Cc: Michal Wajdeczko Reviewed-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251013200944.2499947-48-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 8c12db455eef..0f1a06b62e03 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -629,7 +629,8 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.has_late_bind = desc->has_late_bind; xe->info.has_llc = desc->has_llc; xe->info.has_pxp = desc->has_pxp; - xe->info.has_sriov = desc->has_sriov; + xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) && + desc->has_sriov; xe->info.skip_guc_pc = desc->skip_guc_pc; xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; -- cgit v1.2.3 From 52e59f7740ba23bbb664914967df9a00208ca10c Mon Sep 17 00:00:00 2001 From: Pranjal Ramajor Asha Kanojiya Date: Tue, 7 Oct 2025 08:18:37 +0200 Subject: accel/qaic: Synchronize access to DBC request queue head & tail pointer Two threads of the same process can potential read and write parallelly to head and tail pointers of the same DBC request queue. This could lead to a race condition and corrupt the DBC request queue. Fixes: ff13be830333 ("accel/qaic: Add datapath") Signed-off-by: Pranjal Ramajor Asha Kanojiya Signed-off-by: Youssef Samir Reviewed-by: Jeff Hugo Reviewed-by: Carl Vanderlip [jhugo: Add fixes tag] Signed-off-by: Jeff Hugo Link: https://lore.kernel.org/r/20251007061837.206132-1-youssef.abdulrahman@oss.qualcomm.com --- drivers/accel/qaic/qaic.h | 2 ++ drivers/accel/qaic/qaic_data.c | 12 ++++++++++-- drivers/accel/qaic/qaic_drv.c | 3 +++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h index c31081e42cee..820d133236dd 100644 --- a/drivers/accel/qaic/qaic.h +++ b/drivers/accel/qaic/qaic.h @@ -97,6 +97,8 @@ struct dma_bridge_chan { * response queue's head and tail pointer of this DBC. */ void __iomem *dbc_base; + /* Synchronizes access to Request queue's head and tail pointer */ + struct mutex req_lock; /* Head of list where each node is a memory handle queued in request queue */ struct list_head xfer_list; /* Synchronizes DBC readers during cleanup */ diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index 797289e9d780..c4f117edb266 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -1356,13 +1356,17 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr goto release_ch_rcu; } + ret = mutex_lock_interruptible(&dbc->req_lock); + if (ret) + goto release_ch_rcu; + head = readl(dbc->dbc_base + REQHP_OFF); tail = readl(dbc->dbc_base + REQTP_OFF); if (head == U32_MAX || tail == U32_MAX) { /* PCI link error */ ret = -ENODEV; - goto release_ch_rcu; + goto unlock_req_lock; } queue_level = head <= tail ? tail - head : dbc->nelem - (head - tail); @@ -1370,11 +1374,12 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr ret = send_bo_list_to_device(qdev, file_priv, exec, args->hdr.count, is_partial, dbc, head, &tail); if (ret) - goto release_ch_rcu; + goto unlock_req_lock; /* Finalize commit to hardware */ submit_ts = ktime_get_ns(); writel(tail, dbc->dbc_base + REQTP_OFF); + mutex_unlock(&dbc->req_lock); update_profiling_data(file_priv, exec, args->hdr.count, is_partial, received_ts, submit_ts, queue_level); @@ -1382,6 +1387,9 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr if (datapath_polling) schedule_work(&dbc->poll_work); +unlock_req_lock: + if (ret) + mutex_unlock(&dbc->req_lock); release_ch_rcu: srcu_read_unlock(&dbc->ch_lock, rcu_id); unlock_dev_srcu: diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index e31bcb0ecfc9..e162f4b8a262 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -454,6 +454,9 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, return NULL; init_waitqueue_head(&qdev->dbc[i].dbc_release); INIT_LIST_HEAD(&qdev->dbc[i].bo_lists); + ret = drmm_mutex_init(drm, &qdev->dbc[i].req_lock); + if (ret) + return NULL; } return qdev; -- cgit v1.2.3 From 7e091add9c433bab6912228799bf508e2414acc3 Mon Sep 17 00:00:00 2001 From: Martin George Date: Mon, 8 Sep 2025 22:54:57 +0530 Subject: nvme-auth: update sc_c in host response The sc_c field is currently not updated in the host response to the controller challenge leading to failures while attempting secure channel concatenation. Fix this by adding a new sc_c variable to the dhchap queue context structure which is appropriately set during negotiate and then used in the host response. Fixes: e88a7595b57f ("nvme-tcp: request secure channel concatenation") Signed-off-by: Martin George Signed-off-by: Prashanth Adurthi Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 012fcfc79a73..a01178caf15b 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -36,6 +36,7 @@ struct nvme_dhchap_queue_context { u8 status; u8 dhgroup_id; u8 hash_id; + u8 sc_c; size_t hash_len; u8 c1[64]; u8 c2[64]; @@ -154,6 +155,8 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl, data->auth_protocol[0].dhchap.idlist[34] = NVME_AUTH_DHGROUP_6144; data->auth_protocol[0].dhchap.idlist[35] = NVME_AUTH_DHGROUP_8192; + chap->sc_c = data->sc_c; + return size; } @@ -489,7 +492,7 @@ static int nvme_auth_dhchap_setup_host_response(struct nvme_ctrl *ctrl, ret = crypto_shash_update(shash, buf, 2); if (ret) goto out; - memset(buf, 0, sizeof(buf)); + *buf = chap->sc_c; ret = crypto_shash_update(shash, buf, 1); if (ret) goto out; @@ -500,6 +503,7 @@ static int nvme_auth_dhchap_setup_host_response(struct nvme_ctrl *ctrl, strlen(ctrl->opts->host->nqn)); if (ret) goto out; + memset(buf, 0, sizeof(buf)); ret = crypto_shash_update(shash, buf, 1); if (ret) goto out; -- cgit v1.2.3 From df90f6cd29d8c77be6de4f9adf9cbe42ce2f0016 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 14 Oct 2025 10:40:57 +0200 Subject: slab: fix clearing freelist in free_deferred_objects() defer_free() links pending objects using the slab's freelist offset which is fine as they are not free yet. free_deferred_objects() then clears this pointer to avoid confusing the debugging consistency checks that may be enabled for the cache. However, with CONFIG_SLAB_FREELIST_HARDENED, even the NULL pointer needs to be encoded appropriately using set_freepointer(), otherwise it's decoded as something else and triggers the consistency checks, as found by the kernel test robot. Use set_freepointer() to prevent the issue. Fixes: af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().") Reported-and-tested-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202510101652.7921fdc6-lkp@intel.com Acked-by: Alexei Starovoitov Reviewed-by: Harry Yoo Signed-off-by: Vlastimil Babka --- mm/slub.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index b1f15598fbfd..13ae4491136a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6443,15 +6443,16 @@ static void free_deferred_objects(struct irq_work *work) slab = virt_to_slab(x); s = slab->slab_cache; + /* Point 'x' back to the beginning of allocated object */ + x -= s->offset; + /* * We used freepointer in 'x' to link 'x' into df->objects. * Clear it to NULL to avoid false positive detection * of "Freepointer corruption". */ - *(void **)x = NULL; + set_freepointer(s, x, NULL); - /* Point 'x' back to the beginning of allocated object */ - x -= s->offset; __slab_free(s, slab, x, x, 1, _THIS_IP_); } -- cgit v1.2.3 From 7f9ee5fc97e14682e36fe22ae2654c07e4998b82 Mon Sep 17 00:00:00 2001 From: Shardul Bankar Date: Tue, 14 Oct 2025 17:30:37 +0530 Subject: bpf: test_run: Fix ctx leak in bpf_prog_test_run_xdp error path Fix a memory leak in bpf_prog_test_run_xdp() where the context buffer allocated by bpf_ctx_init() is not freed when the function returns early due to a data size check. On the failing path: ctx = bpf_ctx_init(...); if (kattr->test.data_size_in - meta_sz < ETH_HLEN) return -EINVAL; The early return bypasses the cleanup label that kfree()s ctx, leading to a leak detectable by kmemleak under fuzzing. Change the return to jump to the existing free_ctx label. Fixes: fe9544ed1a2e ("bpf: Support specifying linear xdp packet data size for BPF_PROG_TEST_RUN") Reported-by: BPF Runtime Fuzzer (BRF) Signed-off-by: Shardul Bankar Signed-off-by: Martin KaFai Lau Acked-by: Jiri Olsa Acked-by: Daniel Borkmann Link: https://patch.msgid.link/20251014120037.1981316-1-shardulsb08@gmail.com --- net/bpf/test_run.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index dfb03ee0bb62..1782e83de2cb 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -1269,7 +1269,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, goto free_ctx; if (kattr->test.data_size_in - meta_sz < ETH_HLEN) - return -EINVAL; + goto free_ctx; data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom); if (IS_ERR(data)) { -- cgit v1.2.3 From 2e41e5a91a37202ff6743c3ae5329e106aeb1c6c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Oct 2025 13:57:53 -0700 Subject: cxl/acpi: Fix setup of memory resource in cxl_acpi_set_cache_size() In order to compare the resource against the HMAT memory target, the resource needs to be memory type. Change the DEFINE_RES() macro to DEFINE_RES_MEM() in order to set the correct resource type. hmat_get_extended_linear_cache_size() uses resource_contains() internally. This causes a regression for platforms with the extended linear cache enabled as the comparison always fails and the cache size is not set. User visible impact is that when 'cxl list' is issued, a CXL region with extended linear cache support will only report half the size of the actual size. And this also breaks MCE reporting of the memory region due to incorrect offset calculation for the memory. [dj: Fixup commit log suggested by djbw] [dj: Fixup stable address for cc] Fixes: 12b3d697c812 ("cxl: Remove core/acpi.c and cxl core dependency on ACPI") Cc: stable@vger.kernel.org Reviewed-by: Gregory Price Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index d7a5539d07d4..bd2e282ca93a 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -348,7 +348,7 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) struct resource res; int nid, rc; - res = DEFINE_RES(start, size, 0); + res = DEFINE_RES_MEM(start, size); nid = phys_to_target_node(start); rc = hmat_get_extended_linear_cache_size(&res, nid, &cache_size); -- cgit v1.2.3 From 0f6f1982cb28abf1b8a3a8ba906e2c6ade6a70e8 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Oct 2025 13:57:54 -0700 Subject: cxl: Set range param for region_res_match_cxl_range() as const The function takes two parameters and compares them. The second parameter should be const since no modification should be done to it. Reviewed-by: Gregory Price Reviewed-by: Alison Schofield Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e14c1d305b22..858d4678628d 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -839,7 +839,7 @@ static int match_free_decoder(struct device *dev, const void *data) } static bool region_res_match_cxl_range(const struct cxl_region_params *p, - struct range *range) + const struct range *range) { if (!p->res) return false; -- cgit v1.2.3 From f4d027921c811ff7fc16e4d03c6bbbf4347cf37a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Oct 2025 13:57:55 -0700 Subject: cxl: Fix match_region_by_range() to use region_res_match_cxl_range() match_region_by_range() is not using the helper function that also takes extended linear cache size into account when comparing regions. This causes a x2 region to show up as 2 partial incomplete regions rather than a single CXL region with extended linear cache support. Replace the open coded compare logic with the proper helper function for comparison. User visible impact is that when 'cxl list' is issued, no activa CXL region(s) are shown. There may be multiple idle regions present. No actual active CXL region is present in the kernel. [dj: Fix stable address] Fixes: 0ec9849b6333 ("acpi/hmat / cxl: Add extended linear cache support for CXL") Cc: stable@vger.kernel.org Reviewed-by: Gregory Price Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 858d4678628d..57ed85e332d3 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3398,10 +3398,7 @@ static int match_region_by_range(struct device *dev, const void *data) p = &cxlr->params; guard(rwsem_read)(&cxl_rwsem.region); - if (p->res && p->res->start == r->start && p->res->end == r->end) - return 1; - - return 0; + return region_res_match_cxl_range(p, r); } static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, -- cgit v1.2.3 From 257c4b03a2f7d8c15f79c79b09a561af9734f6c4 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 14 Oct 2025 00:31:04 -0700 Subject: cxl/region: Use %pa printk format to emit resource_size_t KASAN reports a stack-out-of-bounds access in validate_region_offset() while running the cxl-poison.sh unit test because the printk format specifier, %pr format, is not a match for the resource_size_t type of the variables. %pr expects struct resource pointers and attempts to dereference the structure fields, reading beyond the bounds of the stack variables. Since these messages emit an 'A exceeds B' type of message, keep the resource_size_t's and use the %pa specifier to be architecture safe. BUG: KASAN: stack-out-of-bounds in resource_string.isra.0+0xe9a/0x1690 [] Read of size 8 at addr ffff88800a7afb40 by task bash/1397 ... [] The buggy address belongs to stack of task bash/1397 [] and is located at offset 56 in frame: [] validate_region_offset+0x0/0x1c0 [cxl_core] Fixes: c3dd67681c70 ("cxl/region: Add inject and clear poison by region offset") Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 57ed85e332d3..b06fee1978ba 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3663,14 +3663,14 @@ static int validate_region_offset(struct cxl_region *cxlr, u64 offset) if (offset < p->cache_size) { dev_err(&cxlr->dev, - "Offset %#llx is within extended linear cache %pr\n", + "Offset %#llx is within extended linear cache %pa\n", offset, &p->cache_size); return -EINVAL; } region_size = resource_size(p->res); if (offset >= region_size) { - dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pr\n", + dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pa\n", offset, ®ion_size); return -EINVAL; } -- cgit v1.2.3 From 469276c06affdfd2d9e88c9f228bb81119ec1a20 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 14 Oct 2025 19:36:02 +0300 Subject: PCI: Revert early bridge resource set up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit a43ac325c7cb ("PCI: Set up bridge resources earlier") moved bridge window resources set up earlier than before. The change was necessary to support another change that got pulled on the last minute due to breaking s390 and other systems. The presence of valid bridge window resources earlier than before allows pci_assign_unassigned_root_bus_resources() call from pci_host_probe() assign the bridge windows. Some host bridges, however, have to wait first for the link up event before they can enumerate successfully (see e.g. qcom_pcie_global_irq_thread()) and thus the bus has not been enumerated yet while calling pci_host_probe(). Calling pci_assign_unassigned_root_bus_resources() without results from enumeration can result in sizing bridge windows with too small sizes which cannot be later corrected after the enumeration has completed because bridge windows have become pinned in place by the other resources. Interestingly, it seems pci_read_bridge_bases() is not called at all in the problematic case and the bridge window resource type setup is done by pci_bridge_check_ranges() and sizing by the usual resource fitting logic. The root problem behind all this looks pretty generic. If resource fitting is called too early, the hotplug reservation and old size lower bounding cause the bridge windows to be assigned without children but with non-zero size, which leads to these pinning problems. As such, this can likely be solved on the general level but the solution does not look trivial. As the commit a43ac325c7cb ("PCI: Set up bridge resources earlier") was prequisite for other change that did not end up into kernel yet, revert it to resolve the resource assignment failures and give time to code and test a generic solution. Fixes: a43ac325c7cb ("PCI: Set up bridge resources earlier") Reported-by: Val Packett Link: https://lore.kernel.org/r/017ff8df-511c-4da8-b3cf-edf2cb7f1a67@packett.cool Reported-by: Guenter Roeck Link: https://lore.kernel.org/r/df266709-a9b3-4fd8-af3a-c22eb3c9523a@roeck-us.net Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20251014163602.17138-1-ilpo.jarvinen@linux.intel.com --- drivers/pci/probe.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c83e75a0ec12..0ce98e18b5a8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -538,14 +538,10 @@ static void pci_read_bridge_windows(struct pci_dev *bridge) } if (io) { bridge->io_window = 1; - pci_read_bridge_io(bridge, - pci_resource_n(bridge, PCI_BRIDGE_IO_WINDOW), - true); + pci_read_bridge_io(bridge, &res, true); } - pci_read_bridge_mmio(bridge, - pci_resource_n(bridge, PCI_BRIDGE_MEM_WINDOW), - true); + pci_read_bridge_mmio(bridge, &res, true); /* * DECchip 21050 pass 2 errata: the bridge may miss an address @@ -583,10 +579,7 @@ static void pci_read_bridge_windows(struct pci_dev *bridge) bridge->pref_64_window = 1; } - pci_read_bridge_mmio_pref(bridge, - pci_resource_n(bridge, - PCI_BRIDGE_PREF_MEM_WINDOW), - true); + pci_read_bridge_mmio_pref(bridge, &res, true); } void pci_read_bridge_bases(struct pci_bus *child) -- cgit v1.2.3 From df5a1f4aeb6ff5e7c5ac47d16a347f03509dd441 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 13 Oct 2025 20:03:47 +0200 Subject: MAINTAINERS: add myself as maintainer for b53 I wrote the original OpenWrt driver that Florian used as the base for the dsa driver, I might as well take responsibility for it. Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20251013180347.133246-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3a27901781c2..8a213950e37e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4804,6 +4804,7 @@ F: drivers/net/ethernet/broadcom/b44.* BROADCOM B53/SF2 ETHERNET SWITCH DRIVER M: Florian Fainelli +M: Jonas Gorski L: netdev@vger.kernel.org L: openwrt-devel@lists.openwrt.org (subscribers-only) S: Supported -- cgit v1.2.3 From a4bbb493a3247ef32f6191fd8b2a0657139f8e08 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 14 Oct 2025 14:38:49 -0700 Subject: cxl/trace: Subtract to find an hpa_alias0 in cxl_poison events Traces of cxl_poison events include an hpa_alias0 field if the poison address is in a region configured with an ELC, Extended Linear Cache. Since the ELC always comes first in the region, the calculation needs to subtract the ELC size from the calculated HPA address. Fixes: 8c520c5f1e76 ("cxl: Add extended linear cache address alias emission for cxl events") Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Signed-off-by: Dave Jiang --- drivers/cxl/core/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index a53ec4798b12..a972e4ef1936 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -1068,7 +1068,7 @@ TRACE_EVENT(cxl_poison, __entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd, __entry->dpa); if (__entry->hpa != ULLONG_MAX && cxlr->params.cache_size) - __entry->hpa_alias0 = __entry->hpa + + __entry->hpa_alias0 = __entry->hpa - cxlr->params.cache_size; else __entry->hpa_alias0 = ULLONG_MAX; -- cgit v1.2.3 From e603a342cf7ecd64ef8f36207dfe1caacb9e2583 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 14 Oct 2025 13:20:37 -0700 Subject: selftests/bpf: make arg_parsing.c more robust to crashes We started getting a crash in BPF CI, which seems to originate from test_parse_test_list_file() test and is happening at this line: ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name"); One way we can crash there is if set.cnt zero, which is checked for with ASSERT_EQ() above, but we proceed after this regardless of the outcome. Instead of crashing, we should bail out with test failure early. Similarly, if parse_test_list_file() fails, we shouldn't be even looking at set, so bail even earlier if ASSERT_OK() fails. Fixes: 64276f01dce8 ("selftests/bpf: Test_progs can read test lists from file") Signed-off-by: Andrii Nakryiko Tested-by: Ihor Solodrai Link: https://lore.kernel.org/r/20251014202037.72922-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/arg_parsing.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c index bb143de68875..fbf0d9c2f58b 100644 --- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c +++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c @@ -146,9 +146,12 @@ static void test_parse_test_list_file(void) init_test_filter_set(&set); - ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"); + if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file")) + goto out_fclose; + + if (!ASSERT_EQ(set.cnt, 4, "test count")) + goto out_free_set; - ASSERT_EQ(set.cnt, 4, "test count"); ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name"); ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count"); ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name"); @@ -158,8 +161,8 @@ static void test_parse_test_list_file(void) ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name"); ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name"); +out_free_set: free_test_filter_set(&set); - out_fclose: fclose(fp); out_remove: -- cgit v1.2.3 From 7f0fddd817ba6daebea1445ae9fab4b6d2294fa8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 Oct 2025 20:50:52 +0200 Subject: net: core: fix lockdep splat on device unregister Since blamed commit, unregister_netdevice_many_notify() takes the netdev mutex if the device needs it. If the device list is too long, this will lock more device mutexes than lockdep can handle: unshare -n \ bash -c 'for i in $(seq 1 100);do ip link add foo$i type dummy;done' BUG: MAX_LOCK_DEPTH too low! turning off the locking correctness validator. depth: 48 max: 48! 48 locks held by kworker/u16:1/69: #0: ..148 ((wq_completion)netns){+.+.}-{0:0}, at: process_one_work #1: ..d40 (net_cleanup_work){+.+.}-{0:0}, at: process_one_work #2: ..bd0 (pernet_ops_rwsem){++++}-{4:4}, at: cleanup_net #3: ..aa8 (rtnl_mutex){+.+.}-{4:4}, at: default_device_exit_batch #4: ..cb0 (&dev_instance_lock_key#3){+.+.}-{4:4}, at: unregister_netdevice_many_notify [..] Add a helper to close and then unlock a list of net_devices. Devices that are not up have to be skipped - netif_close_many always removes them from the list without any other actions taken, so they'd remain in locked state. Close devices whenever we've used up half of the tracking slots or we processed entire list without hitting the limit. Fixes: 7e4d784f5810 ("net: hold netdev instance lock during rtnetlink operations") Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20251013185052.14021-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/core/dev.c | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index a64cef2c537e..2acfa44927da 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -12176,6 +12176,35 @@ static void dev_memory_provider_uninstall(struct net_device *dev) } } +/* devices must be UP and netdev_lock()'d */ +static void netif_close_many_and_unlock(struct list_head *close_head) +{ + struct net_device *dev, *tmp; + + netif_close_many(close_head, false); + + /* ... now unlock them */ + list_for_each_entry_safe(dev, tmp, close_head, close_list) { + netdev_unlock(dev); + list_del_init(&dev->close_list); + } +} + +static void netif_close_many_and_unlock_cond(struct list_head *close_head) +{ +#ifdef CONFIG_LOCKDEP + /* We can only track up to MAX_LOCK_DEPTH locks per task. + * + * Reserve half the available slots for additional locks possibly + * taken by notifiers and (soft)irqs. + */ + unsigned int limit = MAX_LOCK_DEPTH / 2; + + if (lockdep_depth(current) > limit) + netif_close_many_and_unlock(close_head); +#endif +} + void unregister_netdevice_many_notify(struct list_head *head, u32 portid, const struct nlmsghdr *nlh) { @@ -12208,17 +12237,18 @@ void unregister_netdevice_many_notify(struct list_head *head, /* If device is running, close it first. Start with ops locked... */ list_for_each_entry(dev, head, unreg_list) { + if (!(dev->flags & IFF_UP)) + continue; if (netdev_need_ops_lock(dev)) { list_add_tail(&dev->close_list, &close_head); netdev_lock(dev); } + netif_close_many_and_unlock_cond(&close_head); } - netif_close_many(&close_head, true); - /* ... now unlock them and go over the rest. */ + netif_close_many_and_unlock(&close_head); + /* ... now go over the rest. */ list_for_each_entry(dev, head, unreg_list) { - if (netdev_need_ops_lock(dev)) - netdev_unlock(dev); - else + if (!netdev_need_ops_lock(dev)) list_add_tail(&dev->close_list, &close_head); } netif_close_many(&close_head, true); -- cgit v1.2.3 From 82ebecdc74ff555daf70b811d854b1f32a296bea Mon Sep 17 00:00:00 2001 From: Jaehun Gou Date: Tue, 14 Oct 2025 22:01:46 +0900 Subject: exfat: fix improper check of dentry.stream.valid_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We found an infinite loop bug in the exFAT file system that can lead to a Denial-of-Service (DoS) condition. When a dentry in an exFAT filesystem is malformed, the following system calls — SYS_openat, SYS_ftruncate, and SYS_pwrite64 — can cause the kernel to hang. Root cause analysis shows that the size validation code in exfat_find() does not check whether dentry.stream.valid_size is negative. As a result, the system calls mentioned above can succeed and eventually trigger the DoS issue. This patch adds a check for negative dentry.stream.valid_size to prevent this vulnerability. Co-developed-by: Seunghun Han Signed-off-by: Seunghun Han Co-developed-by: Jihoon Kwon Signed-off-by: Jihoon Kwon Signed-off-by: Jaehun Gou Signed-off-by: Namjae Jeon --- fs/exfat/namei.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 7eb9c67fd35f..2364b49f050a 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -642,10 +642,14 @@ static int exfat_find(struct inode *dir, const struct qstr *qname, info->type = exfat_get_entry_type(ep); info->attr = le16_to_cpu(ep->dentry.file.attr); - info->size = le64_to_cpu(ep2->dentry.stream.valid_size); info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size); info->size = le64_to_cpu(ep2->dentry.stream.size); + if (info->valid_size < 0) { + exfat_fs_error(sb, "data valid size is invalid(%lld)", info->valid_size); + return -EIO; + } + if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) { exfat_fs_error(sb, "data size is invalid(%lld)", info->size); return -EIO; -- cgit v1.2.3 From 6f719373b943a955fee6fc2012aed207b65e2854 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 14 Oct 2025 10:46:34 +0200 Subject: drm/ast: Blank with VGACR17 sync enable, always clear VGACRB6 sync off Blank the display by disabling sync pulses with VGACR17<7>. Unblank by reenabling them. This VGA setting should be supported by all Aspeed hardware. Ast currently blanks via sync-off bits in VGACRB6. Not all BMCs handle VGACRB6 correctly. After disabling sync during a reboot, some BMCs do not reenable it after the soft reset. The display output remains dark. When the display is off during boot, some BMCs set the sync-off bits in VGACRB6, so the display remains dark. Observed with Blackbird AST2500 BMCs. Clearing the sync-off bits unconditionally fixes these issues. Also do not modify VGASR1's SD bit for blanking, as it only disables GPU access to video memory. v2: - init vgacrb6 correctly (Jocelyn) Signed-off-by: Thomas Zimmermann Fixes: ce3d99c83495 ("drm: Call drm_atomic_helper_shutdown() at shutdown time for misc drivers") Tested-by: Nick Bowler Reported-by: Nick Bowler Closes: https://lore.kernel.org/dri-devel/wpwd7rit6t4mnu6kdqbtsnk5bhftgslio6e2jgkz6kgw6cuvvr@xbfswsczfqsi/ Cc: Douglas Anderson Cc: Dave Airlie Cc: Thomas Zimmermann Cc: Jocelyn Falempe Cc: dri-devel@lists.freedesktop.org Cc: # v6.7+ Reviewed-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20251014084743.18242-1-tzimmermann@suse.de --- drivers/gpu/drm/ast/ast_mode.c | 18 ++++++++++-------- drivers/gpu/drm/ast/ast_reg.h | 1 + 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index b4e8edc7c767..30b011ed0a05 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -836,22 +836,24 @@ ast_crtc_helper_atomic_flush(struct drm_crtc *crtc, static void ast_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct ast_device *ast = to_ast_device(crtc->dev); + u8 vgacr17 = 0x00; + u8 vgacrb6 = 0xff; - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xfc, 0x00); - ast_set_index_reg_mask(ast, AST_IO_VGASRI, 0x01, 0xdf, 0x00); + vgacr17 |= AST_IO_VGACR17_SYNC_ENABLE; + vgacrb6 &= ~(AST_IO_VGACRB6_VSYNC_OFF | AST_IO_VGACRB6_HSYNC_OFF); + + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0x17, 0x7f, vgacr17); + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xfc, vgacrb6); } static void ast_crtc_helper_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_crtc_state *old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); struct ast_device *ast = to_ast_device(crtc->dev); - u8 vgacrb6; + u8 vgacr17 = 0xff; - ast_set_index_reg_mask(ast, AST_IO_VGASRI, 0x01, 0xdf, AST_IO_VGASR1_SD); - - vgacrb6 = AST_IO_VGACRB6_VSYNC_OFF | - AST_IO_VGACRB6_HSYNC_OFF; - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xfc, vgacrb6); + vgacr17 &= ~AST_IO_VGACR17_SYNC_ENABLE; + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0x17, 0x7f, vgacr17); /* * HW cursors require the underlying primary plane and CRTC to diff --git a/drivers/gpu/drm/ast/ast_reg.h b/drivers/gpu/drm/ast/ast_reg.h index e15adaf3a80e..30578e3b07e4 100644 --- a/drivers/gpu/drm/ast/ast_reg.h +++ b/drivers/gpu/drm/ast/ast_reg.h @@ -29,6 +29,7 @@ #define AST_IO_VGAGRI (0x4E) #define AST_IO_VGACRI (0x54) +#define AST_IO_VGACR17_SYNC_ENABLE BIT(7) /* called "Hardware reset" in docs */ #define AST_IO_VGACR80_PASSWORD (0xa8) #define AST_IO_VGACR99_VGAMEM_RSRV_MASK GENMASK(1, 0) #define AST_IO_VGACRA1_VGAIO_DISABLED BIT(1) -- cgit v1.2.3 From 28412b489b088fb88dff488305fd4e56bd47f6e4 Mon Sep 17 00:00:00 2001 From: Jiaming Zhang Date: Wed, 15 Oct 2025 13:16:45 +0800 Subject: ALSA: usb-audio: Fix NULL pointer deference in try_to_register_card In try_to_register_card(), the return value of usb_ifnum_to_if() is passed directly to usb_interface_claimed() without a NULL check, which will lead to a NULL pointer dereference when creating an invalid USB audio device. Fix this by adding a check to ensure the interface pointer is valid before passing it to usb_interface_claimed(). Fixes: 39efc9c8a973 ("ALSA: usb-audio: Fix last interface check for registration") Closes: https://lore.kernel.org/all/CANypQFYtQxHL5ghREs-BujZG413RPJGnO5TH=xjFBKpPts33tA@mail.gmail.com/ Signed-off-by: Jiaming Zhang Signed-off-by: Takashi Iwai --- sound/usb/card.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 1d5a65eac933..270dad84d825 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -891,10 +891,16 @@ get_alias_quirk(struct usb_device *dev, unsigned int id) */ static int try_to_register_card(struct snd_usb_audio *chip, int ifnum) { + struct usb_interface *iface; + if (check_delayed_register_option(chip) == ifnum || - chip->last_iface == ifnum || - usb_interface_claimed(usb_ifnum_to_if(chip->dev, chip->last_iface))) + chip->last_iface == ifnum) + return snd_card_register(chip->card); + + iface = usb_ifnum_to_if(chip->dev, chip->last_iface); + if (iface && usb_interface_claimed(iface)) return snd_card_register(chip->card); + return 0; } -- cgit v1.2.3 From 2d8636119b92970ba135c3c4da87d24dbfdeb8ca Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Wed, 15 Oct 2025 16:34:54 +0900 Subject: exfat: fix out-of-bounds in exfat_nls_to_ucs2() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the len argument value passed to exfat_ioctl_set_volume_label() from exfat_nls_to_utf16() is passed 1 too large, an out-of-bounds read occurs when dereferencing p_cstring in exfat_nls_to_ucs2() later. And because of the NLS_NAME_OVERLEN macro, another error occurs when creating a file with a period at the end using utf8 and other iocharsets. So to avoid this, you should remove the code that uses NLS_NAME_OVERLEN macro and make the len argument value be the length of the label string, but with a maximum length of FSLABEL_MAX - 1. Reported-by: syzbot+98cc76a76de46b3714d4@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=98cc76a76de46b3714d4 Fixes: d01579d590f7 ("exfat: Add support for FS_IOC_{GET,SET}FSLABEL") Suggested-by: Pali Rohár Signed-off-by: Jeongjun Park Signed-off-by: Namjae Jeon --- fs/exfat/exfat_fs.h | 1 - fs/exfat/file.c | 7 ++++--- fs/exfat/namei.c | 2 +- fs/exfat/nls.c | 3 --- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 329697c89d09..38210fb6901c 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -29,7 +29,6 @@ enum exfat_error_mode { enum { NLS_NAME_NO_LOSSY = 0, /* no lossy */ NLS_NAME_LOSSY = 1 << 0, /* just detected incorrect filename(s) */ - NLS_NAME_OVERLEN = 1 << 1, /* the length is over than its limit */ }; #define EXFAT_HASH_BITS 8 diff --git a/fs/exfat/file.c b/fs/exfat/file.c index f246cf439588..adc37b4d7fc2 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -509,8 +509,8 @@ static int exfat_ioctl_get_volume_label(struct super_block *sb, unsigned long ar static int exfat_ioctl_set_volume_label(struct super_block *sb, unsigned long arg) { - int ret = 0, lossy; - char label[FSLABEL_MAX]; + int ret = 0, lossy, label_len; + char label[FSLABEL_MAX] = {0}; struct exfat_uni_name uniname; if (!capable(CAP_SYS_ADMIN)) @@ -520,8 +520,9 @@ static int exfat_ioctl_set_volume_label(struct super_block *sb, return -EFAULT; memset(&uniname, 0, sizeof(uniname)); + label_len = strnlen(label, FSLABEL_MAX - 1); if (label[0]) { - ret = exfat_nls_to_utf16(sb, label, FSLABEL_MAX, + ret = exfat_nls_to_utf16(sb, label, label_len, &uniname, &lossy); if (ret < 0) return ret; diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 2364b49f050a..745dce29ddb5 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -442,7 +442,7 @@ static int __exfat_resolve_path(struct inode *inode, const unsigned char *path, return namelen; /* return error value */ if ((lossy && !lookup) || !namelen) - return (lossy & NLS_NAME_OVERLEN) ? -ENAMETOOLONG : -EINVAL; + return -EINVAL; return 0; } diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c index 8243d94ceaf4..57db08a5271c 100644 --- a/fs/exfat/nls.c +++ b/fs/exfat/nls.c @@ -616,9 +616,6 @@ static int exfat_nls_to_ucs2(struct super_block *sb, unilen++; } - if (p_cstring[i] != '\0') - lossy |= NLS_NAME_OVERLEN; - *uniname = '\0'; p_uniname->name_len = unilen; p_uniname->name_hash = exfat_calc_chksum16(upname, unilen << 1, 0, -- cgit v1.2.3 From 57b00ab3d33d2b177bf45d568fa1e6203cd099b6 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Tue, 14 Oct 2025 17:21:57 +0800 Subject: ASoC: sdw_utils: add rt1321 part id to codec_info_list The SdW machine driver supports the part id rt1321 amplifier. Signed-off-by: Shuming Fan Link: https://patch.msgid.link/20251014092157.2789054-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_utils.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 56c72ef27e7b..270c66b90228 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -311,6 +311,26 @@ struct asoc_sdw_codec_info codec_info_list[] = { }, .dai_num = 1, }, + { + .part_id = 0x1321, + .dais = { + { + .direction = {true, false}, + .dai_name = "rt1320-aif1", + .component_name = "rt1320", + .dai_type = SOC_SDW_DAI_TYPE_AMP, + .dailink = {SOC_SDW_AMP_OUT_DAI_ID, SOC_SDW_UNUSED_DAI_ID}, + .init = asoc_sdw_rt_amp_init, + .exit = asoc_sdw_rt_amp_exit, + .rtd_init = asoc_sdw_rt_amp_spk_rtd_init, + .controls = generic_spk_controls, + .num_controls = ARRAY_SIZE(generic_spk_controls), + .widgets = generic_spk_widgets, + .num_widgets = ARRAY_SIZE(generic_spk_widgets), + }, + }, + .dai_num = 1, + }, { .part_id = 0x714, .version_id = 3, -- cgit v1.2.3 From d5cda96d0130effd4255f7c5e720a58760a032a4 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 8 Oct 2025 15:58:01 +0100 Subject: ASoC: codecs: wcd938x-sdw: remove redundant runtime pm calls Component bind callbacks already does runtime pm calls, soundwire codec also tries to do the exactly same thing resulting in Unbalanced pm_runtime_enable and disable calls. Remove the redundant calls from wcd938x-sdw driver. Reported-by: Konrad Dybcio Fixes: ebaf88c0546d ("ASoC: codecs: wcd-common: move component ops to common") Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20251008145801.3479-1-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x-sdw.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index add907cb2706..8c8f39d04972 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1207,24 +1207,14 @@ static int wcd9380_probe(struct sdw_slave *pdev, regcache_cache_only(wcd->regmap, true); } - pm_runtime_set_autosuspend_delay(dev, 3000); - pm_runtime_use_autosuspend(dev); - pm_runtime_mark_last_busy(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - ret = component_add(dev, &wcd_sdw_component_ops); if (ret) - goto err_disable_rpm; - - return 0; + return ret; -err_disable_rpm: - pm_runtime_disable(dev); + /* Set suspended until aggregate device is bind */ pm_runtime_set_suspended(dev); - pm_runtime_dont_use_autosuspend(dev); - return ret; + return 0; } static int wcd9380_remove(struct sdw_slave *pdev) @@ -1233,10 +1223,6 @@ static int wcd9380_remove(struct sdw_slave *pdev) component_del(dev, &wcd_sdw_component_ops); - pm_runtime_disable(dev); - pm_runtime_set_suspended(dev); - pm_runtime_dont_use_autosuspend(dev); - return 0; } -- cgit v1.2.3 From 5fb750e8a9ae123b2034771b864b8a21dbef65cd Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 14 Oct 2025 17:07:00 -0700 Subject: bpf: Replace bpf_map_kmalloc_node() with kmalloc_nolock() to allocate bpf_async_cb structures. The following kmemleak splat: [ 8.105530] kmemleak: Trying to color unknown object at 0xff11000100e918c0 as Black [ 8.106521] Call Trace: [ 8.106521] [ 8.106521] dump_stack_lvl+0x4b/0x70 [ 8.106521] kvfree_call_rcu+0xcb/0x3b0 [ 8.106521] ? hrtimer_cancel+0x21/0x40 [ 8.106521] bpf_obj_free_fields+0x193/0x200 [ 8.106521] htab_map_update_elem+0x29c/0x410 [ 8.106521] bpf_prog_cfc8cd0f42c04044_overwrite_cb+0x47/0x4b [ 8.106521] bpf_prog_8c30cd7c4db2e963_overwrite_timer+0x65/0x86 [ 8.106521] bpf_prog_test_run_syscall+0xe1/0x2a0 happens due to the combination of features and fixes, but mainly due to commit 6d78b4473cdb ("bpf: Tell memcg to use allow_spinning=false path in bpf_timer_init()") It's using __GFP_HIGH, which instructs slub/kmemleak internals to skip kmemleak_alloc_recursive() on allocation, so subsequent kfree_rcu()-> kvfree_call_rcu()->kmemleak_ignore() complains with the above splat. To fix this imbalance, replace bpf_map_kmalloc_node() with kmalloc_nolock() and kfree_rcu() with call_rcu() + kfree_nolock() to make sure that the objects allocated with kmalloc_nolock() are freed with kfree_nolock() rather than the implicit kfree() that kfree_rcu() uses internally. Note, the kmalloc_nolock() happens under bpf_spin_lock_irqsave(), so it will always fail in PREEMPT_RT. This is not an issue at the moment, since bpf_timers are disabled in PREEMPT_RT. In the future bpf_spin_lock will be replaced with state machine similar to bpf_task_work. Fixes: 6d78b4473cdb ("bpf: Tell memcg to use allow_spinning=false path in bpf_timer_init()") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Reviewed-by: Shakeel Butt Acked-by: Harry Yoo Acked-by: Vlastimil Babka Cc: linux-mm@kvack.org Link: https://lore.kernel.org/bpf/20251015000700.28988-1-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 4 ++++ kernel/bpf/helpers.c | 25 ++++++++++++++----------- kernel/bpf/syscall.c | 15 +++++++++++++++ 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a98c83346134..d808253f2e94 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2499,6 +2499,8 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid, #ifdef CONFIG_MEMCG void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); +void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, + int node); void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags); @@ -2511,6 +2513,8 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, */ #define bpf_map_kmalloc_node(_map, _size, _flags, _node) \ kmalloc_node(_size, _flags, _node) +#define bpf_map_kmalloc_nolock(_map, _size, _flags, _node) \ + kmalloc_nolock(_size, _flags, _node) #define bpf_map_kzalloc(_map, _size, _flags) \ kzalloc(_size, _flags) #define bpf_map_kvcalloc(_map, _n, _size, _flags) \ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index c9fab9a356df..8eb117c52817 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1215,13 +1215,20 @@ static void bpf_wq_work(struct work_struct *work) rcu_read_unlock_trace(); } +static void bpf_async_cb_rcu_free(struct rcu_head *rcu) +{ + struct bpf_async_cb *cb = container_of(rcu, struct bpf_async_cb, rcu); + + kfree_nolock(cb); +} + static void bpf_wq_delete_work(struct work_struct *work) { struct bpf_work *w = container_of(work, struct bpf_work, delete_work); cancel_work_sync(&w->work); - kfree_rcu(w, cb.rcu); + call_rcu(&w->cb.rcu, bpf_async_cb_rcu_free); } static void bpf_timer_delete_work(struct work_struct *work) @@ -1230,13 +1237,13 @@ static void bpf_timer_delete_work(struct work_struct *work) /* Cancel the timer and wait for callback to complete if it was running. * If hrtimer_cancel() can be safely called it's safe to call - * kfree_rcu(t) right after for both preallocated and non-preallocated + * call_rcu() right after for both preallocated and non-preallocated * maps. The async->cb = NULL was already done and no code path can see * address 't' anymore. Timer if armed for existing bpf_hrtimer before * bpf_timer_cancel_and_free will have been cancelled. */ hrtimer_cancel(&t->timer); - kfree_rcu(t, cb.rcu); + call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free); } static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags, @@ -1270,11 +1277,7 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u goto out; } - /* Allocate via bpf_map_kmalloc_node() for memcg accounting. Until - * kmalloc_nolock() is available, avoid locking issues by using - * __GFP_HIGH (GFP_ATOMIC & ~__GFP_RECLAIM). - */ - cb = bpf_map_kmalloc_node(map, size, __GFP_HIGH, map->numa_node); + cb = bpf_map_kmalloc_nolock(map, size, 0, map->numa_node); if (!cb) { ret = -ENOMEM; goto out; @@ -1315,7 +1318,7 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u * or pinned in bpffs. */ WRITE_ONCE(async->cb, NULL); - kfree(cb); + kfree_nolock(cb); ret = -EPERM; } out: @@ -1580,7 +1583,7 @@ void bpf_timer_cancel_and_free(void *val) * timer _before_ calling us, such that failing to cancel it here will * cause it to possibly use struct hrtimer after freeing bpf_hrtimer. * Therefore, we _need_ to cancel any outstanding timers before we do - * kfree_rcu, even though no more timers can be armed. + * call_rcu, even though no more timers can be armed. * * Moreover, we need to schedule work even if timer does not belong to * the calling callback_fn, as on two different CPUs, we can end up in a @@ -1607,7 +1610,7 @@ void bpf_timer_cancel_and_free(void *val) * completion. */ if (hrtimer_try_to_cancel(&t->timer) >= 0) - kfree_rcu(t, cb.rcu); + call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free); else queue_work(system_dfl_wq, &t->cb.delete_work); } else { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2a9456a3e730..8a129746bd6c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -520,6 +520,21 @@ void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, return ptr; } +void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, + int node) +{ + struct mem_cgroup *memcg, *old_memcg; + void *ptr; + + memcg = bpf_map_get_memcg(map); + old_memcg = set_active_memcg(memcg); + ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); + set_active_memcg(old_memcg); + mem_cgroup_put(memcg); + + return ptr; +} + void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) { struct mem_cgroup *memcg, *old_memcg; -- cgit v1.2.3 From 6fced056d2cc8d01b326e6fcfabaacb9850b71a4 Mon Sep 17 00:00:00 2001 From: ZhangGuoDong Date: Sun, 12 Oct 2025 00:47:59 +0800 Subject: smb/server: fix possible memory leak in smb2_read() Memory leak occurs when ksmbd_vfs_read() fails. Fix this by adding the missing kvfree(). Co-developed-by: ChenXiaoSong Signed-off-by: ChenXiaoSong Signed-off-by: ZhangGuoDong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index ab1d45fcebde..e81e615f322a 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6824,6 +6824,7 @@ int smb2_read(struct ksmbd_work *work) nbytes = ksmbd_vfs_read(work, fp, length, &offset, aux_payload_buf); if (nbytes < 0) { + kvfree(aux_payload_buf); err = nbytes; goto out; } -- cgit v1.2.3 From 379510a815cb2e64eb0a379cb62295d6ade65df0 Mon Sep 17 00:00:00 2001 From: ZhangGuoDong Date: Sun, 12 Oct 2025 00:51:36 +0800 Subject: smb/server: fix possible refcount leak in smb2_sess_setup() Reference count of ksmbd_session will leak when session need reconnect. Fix this by adding the missing ksmbd_user_session_put(). Co-developed-by: ChenXiaoSong Signed-off-by: ChenXiaoSong Signed-off-by: ZhangGuoDong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index e81e615f322a..b731d9b09408 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1806,6 +1806,7 @@ int smb2_sess_setup(struct ksmbd_work *work) if (ksmbd_conn_need_reconnect(conn)) { rc = -EFAULT; + ksmbd_user_session_put(sess); sess = NULL; goto out_err; } -- cgit v1.2.3 From 88f170814fea74911ceab798a43cbd7c5599bed4 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Wed, 15 Oct 2025 09:25:46 +0200 Subject: ksmbd: fix recursive locking in RPC handle list access Since commit 305853cce3794 ("ksmbd: Fix race condition in RPC handle list access"), ksmbd_session_rpc_method() attempts to lock sess->rpc_lock. This causes hung connections / tasks when a client attempts to open a named pipe. Using Samba's rpcclient tool: $ rpcclient //192.168.1.254 -U user%password $ rpcclient $> srvinfo Kernel side: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/0:0 state:D stack:0 pid:5021 tgid:5021 ppid:2 flags:0x00200000 Workqueue: ksmbd-io handle_ksmbd_work Call trace: __schedule from schedule+0x3c/0x58 schedule from schedule_preempt_disabled+0xc/0x10 schedule_preempt_disabled from rwsem_down_read_slowpath+0x1b0/0x1d8 rwsem_down_read_slowpath from down_read+0x28/0x30 down_read from ksmbd_session_rpc_method+0x18/0x3c ksmbd_session_rpc_method from ksmbd_rpc_open+0x34/0x68 ksmbd_rpc_open from ksmbd_session_rpc_open+0x194/0x228 ksmbd_session_rpc_open from create_smb2_pipe+0x8c/0x2c8 create_smb2_pipe from smb2_open+0x10c/0x27ac smb2_open from handle_ksmbd_work+0x238/0x3dc handle_ksmbd_work from process_scheduled_works+0x160/0x25c process_scheduled_works from worker_thread+0x16c/0x1e8 worker_thread from kthread+0xa8/0xb8 kthread from ret_from_fork+0x14/0x38 Exception stack(0x8529ffb0 to 0x8529fff8) The task deadlocks because the lock is already held: ksmbd_session_rpc_open down_write(&sess->rpc_lock) ksmbd_rpc_open ksmbd_session_rpc_method down_read(&sess->rpc_lock) <-- deadlock Adjust ksmbd_session_rpc_method() callers to take the lock when necessary. Fixes: 305853cce3794 ("ksmbd: Fix race condition in RPC handle list access") Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/mgmt/user_session.c | 7 ++----- fs/smb/server/smb2pdu.c | 9 ++++++++- fs/smb/server/transport_ipc.c | 12 ++++++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 6fa025374f2f..1c181ef99929 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -147,14 +147,11 @@ void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id) int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id) { struct ksmbd_session_rpc *entry; - int method; - down_read(&sess->rpc_lock); + lockdep_assert_held(&sess->rpc_lock); entry = xa_load(&sess->rpc_handle_list, id); - method = entry ? entry->method : 0; - up_read(&sess->rpc_lock); - return method; + return entry ? entry->method : 0; } void ksmbd_session_destroy(struct ksmbd_session *sess) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index b731d9b09408..f901ae18e68a 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4626,8 +4626,15 @@ static int smb2_get_info_file_pipe(struct ksmbd_session *sess, * pipe without opening it, checking error condition here */ id = req->VolatileFileId; - if (!ksmbd_session_rpc_method(sess, id)) + + lockdep_assert_not_held(&sess->rpc_lock); + + down_read(&sess->rpc_lock); + if (!ksmbd_session_rpc_method(sess, id)) { + up_read(&sess->rpc_lock); return -ENOENT; + } + up_read(&sess->rpc_lock); ksmbd_debug(SMB, "FileInfoClass %u, FileId 0x%llx\n", req->FileInfoClass, req->VolatileFileId); diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index 2aa1b29bea08..46f87fd1ce1c 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -825,6 +825,9 @@ struct ksmbd_rpc_command *ksmbd_rpc_write(struct ksmbd_session *sess, int handle if (!msg) return NULL; + lockdep_assert_not_held(&sess->rpc_lock); + + down_read(&sess->rpc_lock); msg->type = KSMBD_EVENT_RPC_REQUEST; req = (struct ksmbd_rpc_command *)msg->payload; req->handle = handle; @@ -833,6 +836,7 @@ struct ksmbd_rpc_command *ksmbd_rpc_write(struct ksmbd_session *sess, int handle req->flags |= KSMBD_RPC_WRITE_METHOD; req->payload_sz = payload_sz; memcpy(req->payload, payload, payload_sz); + up_read(&sess->rpc_lock); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_free(msg); @@ -849,6 +853,9 @@ struct ksmbd_rpc_command *ksmbd_rpc_read(struct ksmbd_session *sess, int handle) if (!msg) return NULL; + lockdep_assert_not_held(&sess->rpc_lock); + + down_read(&sess->rpc_lock); msg->type = KSMBD_EVENT_RPC_REQUEST; req = (struct ksmbd_rpc_command *)msg->payload; req->handle = handle; @@ -856,6 +863,7 @@ struct ksmbd_rpc_command *ksmbd_rpc_read(struct ksmbd_session *sess, int handle) req->flags |= rpc_context_flags(sess); req->flags |= KSMBD_RPC_READ_METHOD; req->payload_sz = 0; + up_read(&sess->rpc_lock); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_free(msg); @@ -876,6 +884,9 @@ struct ksmbd_rpc_command *ksmbd_rpc_ioctl(struct ksmbd_session *sess, int handle if (!msg) return NULL; + lockdep_assert_not_held(&sess->rpc_lock); + + down_read(&sess->rpc_lock); msg->type = KSMBD_EVENT_RPC_REQUEST; req = (struct ksmbd_rpc_command *)msg->payload; req->handle = handle; @@ -884,6 +895,7 @@ struct ksmbd_rpc_command *ksmbd_rpc_ioctl(struct ksmbd_session *sess, int handle req->flags |= KSMBD_RPC_IOCTL_METHOD; req->payload_sz = payload_sz; memcpy(req->payload, payload, payload_sz); + up_read(&sess->rpc_lock); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_free(msg); -- cgit v1.2.3 From b0432201a11b3caaeca6c03f2b3e399275b2e489 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:30 +0200 Subject: smb: client: let destroy_mr_list() keep smbdirect_mr_io memory if registered If a smbdirect_mr_io structure if still visible to callers of smbd_register_mr() we can't free the related memory when the connection is disconnected! Otherwise smbd_deregister_mr() will crash. Now we use a mutex and refcounting in order to keep the memory around if the connection is disconnected. It means smbd_deregister_mr() can be called at any later time to free the memory, which is no longer referenced by nor referencing the connection. It also means smbd_destroy() no longer needs to wait for mr_io.used.count to become 0. Fixes: 050b8c374019 ("smbd: Make upper layer decide when to destroy the transport") Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 146 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 127 insertions(+), 19 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index c3330e43488f..77de85d7cdc3 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1624,19 +1624,7 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "free receive buffers\n"); destroy_receive_buffers(sc); - /* - * For performance reasons, memory registration and deregistration - * are not locked by srv_mutex. It is possible some processes are - * blocked on transport srv_mutex while holding memory registration. - * Release the transport srv_mutex to allow them to hit the failure - * path when sending data, and then release memory registrations. - */ log_rdma_event(INFO, "freeing mr list\n"); - while (atomic_read(&sc->mr_io.used.count)) { - cifs_server_unlock(server); - msleep(1000); - cifs_server_lock(server); - } destroy_mr_list(sc); ib_free_cq(sc->ib.send_cq); @@ -2352,6 +2340,46 @@ static void smbd_mr_recovery_work(struct work_struct *work) } } +static void smbd_mr_disable_locked(struct smbdirect_mr_io *mr) +{ + struct smbdirect_socket *sc = mr->socket; + + lockdep_assert_held(&mr->mutex); + + if (mr->state == SMBDIRECT_MR_DISABLED) + return; + + if (mr->mr) + ib_dereg_mr(mr->mr); + if (mr->sgt.nents) + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + kfree(mr->sgt.sgl); + + mr->mr = NULL; + mr->sgt.sgl = NULL; + mr->sgt.nents = 0; + + mr->state = SMBDIRECT_MR_DISABLED; +} + +static void smbd_mr_free_locked(struct kref *kref) +{ + struct smbdirect_mr_io *mr = + container_of(kref, struct smbdirect_mr_io, kref); + + lockdep_assert_held(&mr->mutex); + + /* + * smbd_mr_disable_locked() should already be called! + */ + if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED)) + smbd_mr_disable_locked(mr); + + mutex_unlock(&mr->mutex); + mutex_destroy(&mr->mutex); + kfree(mr); +} + static void destroy_mr_list(struct smbdirect_socket *sc) { struct smbdirect_mr_io *mr, *tmp; @@ -2365,13 +2393,31 @@ static void destroy_mr_list(struct smbdirect_socket *sc) spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); list_for_each_entry_safe(mr, tmp, &all_list, list) { - if (mr->mr) - ib_dereg_mr(mr->mr); - if (mr->sgt.nents) - ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); - kfree(mr->sgt.sgl); + mutex_lock(&mr->mutex); + + smbd_mr_disable_locked(mr); list_del(&mr->list); - kfree(mr); + mr->socket = NULL; + + /* + * No kref_put_mutex() as it's already locked. + * + * If smbd_mr_free_locked() is called + * and the mutex is unlocked and mr is gone, + * in that case kref_put() returned 1. + * + * If kref_put() returned 0 we know that + * smbd_mr_free_locked() didn't + * run. Not by us nor by anyone else, as we + * still hold the mutex, so we need to unlock. + * + * If the mr is still registered it will + * be dangling (detached from the connection + * waiting for smbd_deregister_mr() to be + * called in order to free the memory. + */ + if (!kref_put(&mr->kref, smbd_mr_free_locked)) + mutex_unlock(&mr->mutex); } } @@ -2402,6 +2448,9 @@ static int allocate_mr_list(struct smbdirect_socket *sc) goto kzalloc_mr_failed; } + kref_init(&mr->kref); + mutex_init(&mr->mutex); + mr->mr = ib_alloc_mr(sc->ib.pd, sc->mr_io.type, sp->max_frmr_depth); @@ -2434,6 +2483,7 @@ static int allocate_mr_list(struct smbdirect_socket *sc) kcalloc_sgl_failed: ib_dereg_mr(mr->mr); ib_alloc_mr_failed: + mutex_destroy(&mr->mutex); kfree(mr); kzalloc_mr_failed: destroy_mr_list(sc); @@ -2471,6 +2521,7 @@ again: list_for_each_entry(ret, &sc->mr_io.all.list, list) { if (ret->state == SMBDIRECT_MR_READY) { ret->state = SMBDIRECT_MR_REGISTERED; + kref_get(&ret->kref); spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); atomic_dec(&sc->mr_io.ready.count); atomic_inc(&sc->mr_io.used.count); @@ -2535,6 +2586,8 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, return NULL; } + mutex_lock(&mr->mutex); + mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; mr->need_invalidate = need_invalidate; mr->sgt.nents = 0; @@ -2578,8 +2631,16 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, * on the next ib_post_send when we actually send I/O to remote peer */ rc = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); - if (!rc) + if (!rc) { + /* + * get_mr() gave us a reference + * via kref_get(&mr->kref), we keep that and let + * the caller use smbd_deregister_mr() + * to remove it again. + */ + mutex_unlock(&mr->mutex); return mr; + } log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", rc, reg_wr->key); @@ -2596,6 +2657,25 @@ dma_map_error: smbd_disconnect_rdma_connection(sc); + /* + * get_mr() gave us a reference + * via kref_get(&mr->kref), we need to remove it again + * on error. + * + * No kref_put_mutex() as it's already locked. + * + * If smbd_mr_free_locked() is called + * and the mutex is unlocked and mr is gone, + * in that case kref_put() returned 1. + * + * If kref_put() returned 0 we know that + * smbd_mr_free_locked() didn't + * run. Not by us nor by anyone else, as we + * still hold the mutex, so we need to unlock. + */ + if (!kref_put(&mr->kref, smbd_mr_free_locked)) + mutex_unlock(&mr->mutex); + return NULL; } @@ -2624,6 +2704,15 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr) { struct smbdirect_socket *sc = mr->socket; + mutex_lock(&mr->mutex); + if (mr->state == SMBDIRECT_MR_DISABLED) + goto put_kref; + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + smbd_mr_disable_locked(mr); + goto put_kref; + } + if (mr->need_invalidate) { struct ib_send_wr *wr = &mr->inv_wr; int rc; @@ -2640,6 +2729,7 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr) rc = ib_post_send(sc->ib.qp, wr, NULL); if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); + smbd_mr_disable_locked(mr); smbd_disconnect_rdma_connection(sc); goto done; } @@ -2671,6 +2761,24 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr) done: if (atomic_dec_and_test(&sc->mr_io.used.count)) wake_up(&sc->mr_io.cleanup.wait_queue); + +put_kref: + /* + * No kref_put_mutex() as it's already locked. + * + * If smbd_mr_free_locked() is called + * and the mutex is unlocked and mr is gone, + * in that case kref_put() returned 1. + * + * If kref_put() returned 0 we know that + * smbd_mr_free_locked() didn't + * run. Not by us nor by anyone else, as we + * still hold the mutex, so we need to unlock + * and keep the mr in SMBDIRECT_MR_READY or + * SMBDIRECT_MR_ERROR state. + */ + if (!kref_put(&mr->kref, smbd_mr_free_locked)) + mutex_unlock(&mr->mutex); } static bool smb_set_sge(struct smb_extract_to_rdma *rdma, -- cgit v1.2.3 From d877470b59910b5c50383d634dda3782386bba51 Mon Sep 17 00:00:00 2001 From: ZhangGuoDong Date: Mon, 13 Oct 2025 00:17:30 +0800 Subject: smb: move some duplicate definitions to common/cifsglob.h In order to maintain the code more easily, move duplicate definitions to new common header file. Co-developed-by: ChenXiaoSong Signed-off-by: ChenXiaoSong Signed-off-by: ZhangGuoDong Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 19 +------------------ fs/smb/common/cifsglob.h | 30 ++++++++++++++++++++++++++++++ fs/smb/server/smb_common.h | 14 +------------- 3 files changed, 32 insertions(+), 31 deletions(-) create mode 100644 fs/smb/common/cifsglob.h diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 8f6f567d7474..c5034cf9ac9e 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -24,6 +24,7 @@ #include "cifsacl.h" #include #include +#include "../common/cifsglob.h" #include "../common/smb2pdu.h" #include "smb2pdu.h" #include @@ -702,12 +703,6 @@ get_rfc1002_length(void *buf) return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } -static inline void -inc_rfc1001_len(void *buf, int count) -{ - be32_add_cpu((__be32 *)buf, count); -} - struct TCP_Server_Info { struct list_head tcp_ses_list; struct list_head smb_ses_list; @@ -1021,8 +1016,6 @@ compare_mid(__u16 mid, const struct smb_hdr *smb) #define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) #define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) -#define CIFS_DEFAULT_IOSIZE (1024 * 1024) - /* * Windows only supports a max of 60kb reads and 65535 byte writes. Default to * those values when posix extensions aren't in force. In actuality here, we @@ -2148,30 +2141,20 @@ extern mempool_t cifs_io_request_pool; extern mempool_t cifs_io_subrequest_pool; /* Operations for different SMB versions */ -#define SMB1_VERSION_STRING "1.0" -#define SMB20_VERSION_STRING "2.0" #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; extern struct smb_version_operations smb20_operations; extern struct smb_version_values smb20_values; #endif /* CIFS_ALLOW_INSECURE_LEGACY */ -#define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; -#define SMBDEFAULT_VERSION_STRING "default" extern struct smb_version_values smbdefault_values; -#define SMB3ANY_VERSION_STRING "3" extern struct smb_version_values smb3any_values; -#define SMB30_VERSION_STRING "3.0" extern struct smb_version_operations smb30_operations; extern struct smb_version_values smb30_values; -#define SMB302_VERSION_STRING "3.02" -#define ALT_SMB302_VERSION_STRING "3.0.2" /*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */ extern struct smb_version_values smb302_values; -#define SMB311_VERSION_STRING "3.1.1" -#define ALT_SMB311_VERSION_STRING "3.11" extern struct smb_version_operations smb311_operations; extern struct smb_version_values smb311_values; diff --git a/fs/smb/common/cifsglob.h b/fs/smb/common/cifsglob.h new file mode 100644 index 000000000000..00fd215e3eb5 --- /dev/null +++ b/fs/smb/common/cifsglob.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * + * Copyright (C) International Business Machines Corp., 2002,2008 + * Author(s): Steve French (sfrench@us.ibm.com) + * Jeremy Allison (jra@samba.org) + * + */ +#ifndef _COMMON_CIFS_GLOB_H +#define _COMMON_CIFS_GLOB_H + +static inline void inc_rfc1001_len(void *buf, int count) +{ + be32_add_cpu((__be32 *)buf, count); +} + +#define SMB1_VERSION_STRING "1.0" +#define SMB20_VERSION_STRING "2.0" +#define SMB21_VERSION_STRING "2.1" +#define SMBDEFAULT_VERSION_STRING "default" +#define SMB3ANY_VERSION_STRING "3" +#define SMB30_VERSION_STRING "3.0" +#define SMB302_VERSION_STRING "3.02" +#define ALT_SMB302_VERSION_STRING "3.0.2" +#define SMB311_VERSION_STRING "3.1.1" +#define ALT_SMB311_VERSION_STRING "3.11" + +#define CIFS_DEFAULT_IOSIZE (1024 * 1024) + +#endif /* _COMMON_CIFS_GLOB_H */ diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index d742ba754348..863716207a0d 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -10,6 +10,7 @@ #include "glob.h" #include "nterr.h" +#include "../common/cifsglob.h" #include "../common/smb2pdu.h" #include "smb2pdu.h" @@ -26,16 +27,8 @@ #define SMB311_PROT 6 #define BAD_PROT 0xFFFF -#define SMB1_VERSION_STRING "1.0" -#define SMB20_VERSION_STRING "2.0" -#define SMB21_VERSION_STRING "2.1" -#define SMB30_VERSION_STRING "3.0" -#define SMB302_VERSION_STRING "3.02" -#define SMB311_VERSION_STRING "3.1.1" - #define SMB_ECHO_INTERVAL (60 * HZ) -#define CIFS_DEFAULT_IOSIZE (64 * 1024) #define MAX_CIFS_SMALL_BUFFER_SIZE 448 /* big enough for most */ #define MAX_STREAM_PROT_LEN 0x00FFFFFF @@ -464,9 +457,4 @@ static inline unsigned int get_rfc1002_len(void *buf) { return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } - -static inline void inc_rfc1001_len(void *buf, int count) -{ - be32_add_cpu((__be32 *)buf, count); -} #endif /* __SMB_COMMON_H__ */ -- cgit v1.2.3 From dc96cefef0d3032c69e46a21b345c60e56b18934 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 15 Oct 2025 09:48:27 +0800 Subject: blk-mq: fix stale tag depth for shared sched tags in blk_mq_update_nr_requests() Commit 7f2799c546db ("blk-mq: cleanup shared tags case in blk_mq_update_nr_requests()") moves blk_mq_tag_update_sched_shared_tags() before q->nr_requests is updated, however, it's still using the old q->nr_requests to resize tag depth. Fix this problem by passing in expected new tag depth. Fixes: 7f2799c546db ("blk-mq: cleanup shared tags case in blk_mq_update_nr_requests()") Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Reviewed-by: Nilay Shroff Reported-by: Chris Mason Link: https://lore.kernel.org/linux-block/20251014130507.4187235-2-clm@meta.com/ Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 2 +- block/blk-mq-tag.c | 5 +++-- block/blk-mq.c | 2 +- block/blk-mq.h | 3 ++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index d06bb137a743..e0bed16485c3 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -557,7 +557,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e, if (blk_mq_is_shared_tags(flags)) { /* Shared tags are stored at index 0 in @et->tags. */ q->sched_shared_tags = et->tags[0]; - blk_mq_tag_update_sched_shared_tags(q); + blk_mq_tag_update_sched_shared_tags(q, et->nr_requests); } queue_for_each_hw_ctx(q, hctx, i) { diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index c7a4d4b9cc87..5b664dbdf655 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -622,10 +622,11 @@ void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, unsigned int size sbitmap_queue_resize(&tags->bitmap_tags, size - set->reserved_tags); } -void blk_mq_tag_update_sched_shared_tags(struct request_queue *q) +void blk_mq_tag_update_sched_shared_tags(struct request_queue *q, + unsigned int nr) { sbitmap_queue_resize(&q->sched_shared_tags->bitmap_tags, - q->nr_requests - q->tag_set->reserved_tags); + nr - q->tag_set->reserved_tags); } /** diff --git a/block/blk-mq.c b/block/blk-mq.c index 09f579414161..d626d32f6e57 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4941,7 +4941,7 @@ struct elevator_tags *blk_mq_update_nr_requests(struct request_queue *q, * tags can't grow, see blk_mq_alloc_sched_tags(). */ if (q->elevator) - blk_mq_tag_update_sched_shared_tags(q); + blk_mq_tag_update_sched_shared_tags(q, nr); else blk_mq_tag_resize_shared_tags(set, nr); } else if (!q->elevator) { diff --git a/block/blk-mq.h b/block/blk-mq.h index af42dc018808..c4fccdeb5441 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -186,7 +186,8 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags); void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, unsigned int size); -void blk_mq_tag_update_sched_shared_tags(struct request_queue *q); +void blk_mq_tag_update_sched_shared_tags(struct request_queue *q, + unsigned int nr); void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn, -- cgit v1.2.3 From 08823e89e3e269bf4c4a20b4c24a8119920cc7a4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 15 Oct 2025 18:30:39 +0800 Subject: block: Remove elevator_lock usage from blkg_conf frozen operations Remove the acquisition and release of q->elevator_lock in the blkg_conf_open_bdev_frozen() and blkg_conf_exit_frozen() functions. The elevator lock is no longer needed in these code paths since commit 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()") which introduces `disk->rqos_state_mutex` for protecting wbt state change, and not necessary to abuse elevator_lock for this purpose. This change helps to solve the lockdep warning reported from Yu Kuai[1]. Pass blktests/throtl with lockdep enabled. Links: https://lore.kernel.org/linux-block/e5e7ac3f-2063-473a-aafb-4d8d43e5576e@yukuai.org.cn/ [1] Fixes: commit 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()") Signed-off-by: Ming Lei Reviewed-by: Nilay Shroff Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f93de34fe87d..3cffb68ba5d8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -812,8 +812,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) } /* * Similar to blkg_conf_open_bdev, but additionally freezes the queue, - * acquires q->elevator_lock, and ensures the correct locking order - * between q->elevator_lock and q->rq_qos_mutex. + * ensures the correct locking order between freeze queue and q->rq_qos_mutex. * * This function returns negative error on failure. On success it returns * memflags which must be saved and later passed to blkg_conf_exit_frozen @@ -834,13 +833,11 @@ unsigned long __must_check blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx) * At this point, we haven’t started protecting anything related to QoS, * so we release q->rq_qos_mutex here, which was first acquired in blkg_ * conf_open_bdev. Later, we re-acquire q->rq_qos_mutex after freezing - * the queue and acquiring q->elevator_lock to maintain the correct - * locking order. + * the queue to maintain the correct locking order. */ mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex); memflags = blk_mq_freeze_queue(ctx->bdev->bd_queue); - mutex_lock(&ctx->bdev->bd_queue->elevator_lock); mutex_lock(&ctx->bdev->bd_queue->rq_qos_mutex); return memflags; @@ -995,9 +992,8 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx) EXPORT_SYMBOL_GPL(blkg_conf_exit); /* - * Similar to blkg_conf_exit, but also unfreezes the queue and releases - * q->elevator_lock. Should be used when blkg_conf_open_bdev_frozen - * is used to open the bdev. + * Similar to blkg_conf_exit, but also unfreezes the queue. Should be used + * when blkg_conf_open_bdev_frozen is used to open the bdev. */ void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags) { @@ -1005,7 +1001,6 @@ void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags) struct request_queue *q = ctx->bdev->bd_queue; blkg_conf_exit(ctx); - mutex_unlock(&q->elevator_lock); blk_mq_unfreeze_queue(q, memflags); } } -- cgit v1.2.3 From be7cab44ed099566c605a8dac686c3254db01b35 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 15 Oct 2025 13:07:23 +0100 Subject: io_uring: protect mem region deregistration io_create_region_mmap_safe() protects publishing of a region against concurrent mmap calls, however we should also protect against it when removing a region. There is a gap io_register_mem_region() where it safely publishes a region, but then copy_to_user goes wrong and it unsafely frees the region. Cc: stable@vger.kernel.org Fixes: 087f997870a94 ("io_uring/memmap: implement mmap for regions") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/register.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/register.c b/io_uring/register.c index 43f04c47522c..58d43d624856 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -613,6 +613,7 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) if (ret) return ret; if (copy_to_user(rd_uptr, &rd, sizeof(rd))) { + guard(mutex)(&ctx->mmap_lock); io_free_region(ctx, &ctx->param_region); return -EFAULT; } -- cgit v1.2.3 From 437c23357d897f5b5b7d297c477da44b56654d46 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 15 Oct 2025 13:10:31 +0100 Subject: io_uring: fix unexpected placement on same size resizing There might be many reasons why a user is resizing a ring, e.g. moving to huge pages or for some memory compaction using IORING_SETUP_NO_MMAP. Don't bypass resizing, the user will definitely be surprised seeing 0 while the rings weren't actually moved to a new place. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/register.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/io_uring/register.c b/io_uring/register.c index 58d43d624856..2e4717f1357c 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -421,13 +421,6 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) if (unlikely(ret)) return ret; - /* nothing to do, but copy params back */ - if (p.sq_entries == ctx->sq_entries && p.cq_entries == ctx->cq_entries) { - if (copy_to_user(arg, &p, sizeof(p))) - return -EFAULT; - return 0; - } - size = rings_size(p.flags, p.sq_entries, p.cq_entries, &sq_array_offset); if (size == SIZE_MAX) -- cgit v1.2.3 From 95355766e5871e9cdc574be5a3b115392ad33aea Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Mon, 22 Sep 2025 13:27:25 +0300 Subject: drm/i915/psr: Deactivate PSR only on LNL and when selective fetch enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using intel_psr_exit in frontbuffer flush on older platforms seems to be causing problems. Sending single full frame update using intel_psr_force_update is anyways more optimal compared to psr deactivate/activate -> move back to this approach on PSR1, PSR HW tracking and Panel Replay full frame update and use deactivate/activate only on LunarLake and only when selective fetch is enabled. Tested-by: Lemen Tested-by: Koos Vriezen Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14946 Signed-off-by: Jouni Högander Reviewed-by: Mika Kahola Link: https://lore.kernel.org/r/20250922102725.2752742-1-jouni.hogander@intel.com (cherry picked from commit 924adb0bbdd8fef25fd229c76e3f602c3e8752ee) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_psr.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 01bf304c705f..10eb93a34cf2 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -3402,6 +3402,7 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) struct intel_display *display = to_intel_display(intel_dp); if (DISPLAY_VER(display) < 20 && intel_dp->psr.psr2_sel_fetch_enabled) { + /* Selective fetch prior LNL */ if (intel_dp->psr.psr2_sel_fetch_cff_enabled) { /* can we turn CFF off? */ if (intel_dp->psr.busy_frontbuffer_bits == 0) @@ -3420,12 +3421,19 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) intel_psr_configure_full_frame_update(intel_dp); intel_psr_force_update(intel_dp); + } else if (!intel_dp->psr.psr2_sel_fetch_enabled) { + /* + * PSR1 on all platforms + * PSR2 HW tracking + * Panel Replay Full frame update + */ + intel_psr_force_update(intel_dp); } else { + /* Selective update LNL onwards */ intel_psr_exit(intel_dp); } - if ((!intel_dp->psr.psr2_sel_fetch_enabled || DISPLAY_VER(display) >= 20) && - !intel_dp->psr.busy_frontbuffer_bits) + if (!intel_dp->psr.active && !intel_dp->psr.busy_frontbuffer_bits) queue_work(display->wq.unordered, &intel_dp->psr.work); } -- cgit v1.2.3 From 641bcf8731d21b56760e3646a39a65f471e9efd1 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 10 Oct 2025 17:20:21 +0100 Subject: drm/xe/migrate: don't misalign current bytes If current bytes exceeds the max copy size, ensure the clamped size still accounts for the XE_CACHELINE_BYTES alignment, otherwise we trigger the assert in xe_migrate_vram with the size now being out of alignment. Fixes: 8c2d61e0e916 ("drm/xe/migrate: don't overflow max copy size") Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6212 Signed-off-by: Matthew Auld Cc: Stuart Summers Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251010162020.190962-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 4ca48dd1cfd8..3112c966c67d 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -2176,7 +2176,9 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, if (current_bytes & ~PAGE_MASK) { int pitch = 4; - current_bytes = min_t(int, current_bytes, S16_MAX * pitch); + current_bytes = min_t(int, current_bytes, + round_down(S16_MAX * pitch, + XE_CACHELINE_BYTES)); } __fence = xe_migrate_vram(m, current_bytes, -- cgit v1.2.3 From a10b4a69c7f8f596d2c5218fbe84430734fab3b2 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 10 Oct 2025 16:24:58 +0100 Subject: drm/xe/evict: drop bogus assert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This assert can trigger here with non pin_map users that select LATE_RESTORE, since the vmap is allowed to be NULL given that save/restore can now use the blitter instead. The check here doesn't seem to have much value anymore given that we no longer move pinned memory, so any existing vmap is left well alone, and doesn't need to be recreated upon restore, so just drop the assert here. Fixes: 86f69c26113c ("drm/xe: use backup object for pinned save/restore") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6213 Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20251010152457.177884-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_bo_evict.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 1a12675b2ea9..7661fca7f278 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -191,7 +191,6 @@ int xe_bo_evict_all(struct xe_device *xe) static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) { - struct xe_device *xe = xe_bo_device(bo); int ret; ret = xe_bo_restore_pinned(bo); @@ -210,13 +209,6 @@ static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) } } - /* - * We expect validate to trigger a move VRAM and our move code - * should setup the iosys map. - */ - xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) || - !iosys_map_is_null(&bo->vmap)); - return 0; } -- cgit v1.2.3 From 0f5878834d6ce97426219b64c02a2c4081419d53 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 13 Oct 2025 02:14:22 +0200 Subject: rust: bitmap: clean Rust 1.92.0 `unused_unsafe` warning Starting with Rust 1.92.0 (expected 2025-12-11), Rust allows to safely take the address of a union field [1][2]: CLIPPY L rust/kernel.o error: unnecessary `unsafe` block --> rust/kernel/bitmap.rs:169:13 | 169 | unsafe { core::ptr::addr_of!(self.repr.bitmap) } | ^^^^^^ unnecessary `unsafe` block | = note: `-D unused-unsafe` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(unused_unsafe)]` error: unnecessary `unsafe` block --> rust/kernel/bitmap.rs:185:13 | 185 | unsafe { core::ptr::addr_of_mut!(self.repr.bitmap) } | ^^^^^^ unnecessary `unsafe` block Thus allow both instances to clean the warning in newer compilers. Link: https://github.com/rust-lang/rust/issues/141264 [1] Link: https://github.com/rust-lang/rust/pull/141469 [2] Signed-off-by: Miguel Ojeda Reviewed-by: Alice Ryhl Signed-off-by: Yury Norov (NVIDIA) --- rust/kernel/bitmap.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/kernel/bitmap.rs b/rust/kernel/bitmap.rs index f45915694454..711b8368b38f 100644 --- a/rust/kernel/bitmap.rs +++ b/rust/kernel/bitmap.rs @@ -166,6 +166,7 @@ impl core::ops::Deref for BitmapVec { fn deref(&self) -> &Bitmap { let ptr = if self.nbits <= BITS_PER_LONG { // SAFETY: Bitmap is represented inline. + #[allow(unused_unsafe, reason = "Safe since Rust 1.92.0")] unsafe { core::ptr::addr_of!(self.repr.bitmap) } } else { // SAFETY: Bitmap is represented as array of `unsigned long`. @@ -182,6 +183,7 @@ impl core::ops::DerefMut for BitmapVec { fn deref_mut(&mut self) -> &mut Bitmap { let ptr = if self.nbits <= BITS_PER_LONG { // SAFETY: Bitmap is represented inline. + #[allow(unused_unsafe, reason = "Safe since Rust 1.92.0")] unsafe { core::ptr::addr_of_mut!(self.repr.bitmap) } } else { // SAFETY: Bitmap is represented as array of `unsigned long`. -- cgit v1.2.3 From 7e85ac9da1acc591bd5269f2b890ed1994c42e96 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Mon, 13 Oct 2025 09:34:04 +0200 Subject: PM / devfreq: rockchip-dfi: switch to FIELD_PREP_WM16 macro The era of hand-rolled HIWORD_UPDATE macros is over, at least for those drivers that use constant masks. Like many other Rockchip drivers, rockchip-dfi brings with it its own HIWORD_UPDATE macro. This variant doesn't shift the value (and like the others, doesn't do any checking). Remove it, and replace instances of it with hw_bitfield.h's FIELD_PREP_WM16. Since FIELD_PREP_WM16 requires contiguous masks and shifts the value for us, some reshuffling of definitions needs to happen. This gives us better compile-time error checking, and in my opinion, nicer code. Tested on an RK3568 ODROID-M1 board (LPDDR4X at 1560 MHz, an RK3588 Radxa ROCK 5B board (LPDDR4X at 2112 MHz) and an RK3588 Radxa ROCK 5T board (LPDDR5 at 2400 MHz). perf measurements were consistent with the measurements of stress-ng --stream in all cases. Signed-off-by: Nicolas Frattaroli Signed-off-by: Yury Norov (NVIDIA) --- drivers/devfreq/event/rockchip-dfi.c | 45 ++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c index 5a2c9badcc64..5e6e7e900bda 100644 --- a/drivers/devfreq/event/rockchip-dfi.c +++ b/drivers/devfreq/event/rockchip-dfi.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -30,8 +31,6 @@ #define DMC_MAX_CHANNELS 4 -#define HIWORD_UPDATE(val, mask) ((val) | (mask) << 16) - /* DDRMON_CTRL */ #define DDRMON_CTRL 0x04 #define DDRMON_CTRL_LPDDR5 BIT(6) @@ -41,10 +40,6 @@ #define DDRMON_CTRL_LPDDR23 BIT(2) #define DDRMON_CTRL_SOFTWARE_EN BIT(1) #define DDRMON_CTRL_TIMER_CNT_EN BIT(0) -#define DDRMON_CTRL_DDR_TYPE_MASK (DDRMON_CTRL_LPDDR5 | \ - DDRMON_CTRL_DDR4 | \ - DDRMON_CTRL_LPDDR4 | \ - DDRMON_CTRL_LPDDR23) #define DDRMON_CTRL_LP5_BANK_MODE_MASK GENMASK(8, 7) #define DDRMON_CH0_WR_NUM 0x20 @@ -124,27 +119,31 @@ struct rockchip_dfi { unsigned int count_multiplier; /* number of data clocks per count */ }; -static int rockchip_dfi_ddrtype_to_ctrl(struct rockchip_dfi *dfi, u32 *ctrl, - u32 *mask) +static int rockchip_dfi_ddrtype_to_ctrl(struct rockchip_dfi *dfi, u32 *ctrl) { u32 ddrmon_ver; - *mask = DDRMON_CTRL_DDR_TYPE_MASK; - switch (dfi->ddr_type) { case ROCKCHIP_DDRTYPE_LPDDR2: case ROCKCHIP_DDRTYPE_LPDDR3: - *ctrl = DDRMON_CTRL_LPDDR23; + *ctrl = FIELD_PREP_WM16(DDRMON_CTRL_LPDDR23, 1) | + FIELD_PREP_WM16(DDRMON_CTRL_LPDDR4, 0) | + FIELD_PREP_WM16(DDRMON_CTRL_LPDDR5, 0); break; case ROCKCHIP_DDRTYPE_LPDDR4: case ROCKCHIP_DDRTYPE_LPDDR4X: - *ctrl = DDRMON_CTRL_LPDDR4; + *ctrl = FIELD_PREP_WM16(DDRMON_CTRL_LPDDR23, 0) | + FIELD_PREP_WM16(DDRMON_CTRL_LPDDR4, 1) | + FIELD_PREP_WM16(DDRMON_CTRL_LPDDR5, 0); break; case ROCKCHIP_DDRTYPE_LPDDR5: ddrmon_ver = readl_relaxed(dfi->regs); if (ddrmon_ver < 0x40) { - *ctrl = DDRMON_CTRL_LPDDR5 | dfi->lp5_bank_mode; - *mask |= DDRMON_CTRL_LP5_BANK_MODE_MASK; + *ctrl = FIELD_PREP_WM16(DDRMON_CTRL_LPDDR23, 0) | + FIELD_PREP_WM16(DDRMON_CTRL_LPDDR4, 0) | + FIELD_PREP_WM16(DDRMON_CTRL_LPDDR5, 1) | + FIELD_PREP_WM16(DDRMON_CTRL_LP5_BANK_MODE_MASK, + dfi->lp5_bank_mode); break; } @@ -172,7 +171,6 @@ static int rockchip_dfi_enable(struct rockchip_dfi *dfi) void __iomem *dfi_regs = dfi->regs; int i, ret = 0; u32 ctrl; - u32 ctrl_mask; mutex_lock(&dfi->mutex); @@ -186,7 +184,7 @@ static int rockchip_dfi_enable(struct rockchip_dfi *dfi) goto out; } - ret = rockchip_dfi_ddrtype_to_ctrl(dfi, &ctrl, &ctrl_mask); + ret = rockchip_dfi_ddrtype_to_ctrl(dfi, &ctrl); if (ret) goto out; @@ -196,15 +194,16 @@ static int rockchip_dfi_enable(struct rockchip_dfi *dfi) continue; /* clear DDRMON_CTRL setting */ - writel_relaxed(HIWORD_UPDATE(0, DDRMON_CTRL_TIMER_CNT_EN | - DDRMON_CTRL_SOFTWARE_EN | DDRMON_CTRL_HARDWARE_EN), + writel_relaxed(FIELD_PREP_WM16(DDRMON_CTRL_TIMER_CNT_EN, 0) | + FIELD_PREP_WM16(DDRMON_CTRL_SOFTWARE_EN, 0) | + FIELD_PREP_WM16(DDRMON_CTRL_HARDWARE_EN, 0), dfi_regs + i * dfi->ddrmon_stride + DDRMON_CTRL); - writel_relaxed(HIWORD_UPDATE(ctrl, ctrl_mask), - dfi_regs + i * dfi->ddrmon_stride + DDRMON_CTRL); + writel_relaxed(ctrl, dfi_regs + i * dfi->ddrmon_stride + + DDRMON_CTRL); /* enable count, use software mode */ - writel_relaxed(HIWORD_UPDATE(DDRMON_CTRL_SOFTWARE_EN, DDRMON_CTRL_SOFTWARE_EN), + writel_relaxed(FIELD_PREP_WM16(DDRMON_CTRL_SOFTWARE_EN, 1), dfi_regs + i * dfi->ddrmon_stride + DDRMON_CTRL); if (dfi->ddrmon_ctrl_single) @@ -234,8 +233,8 @@ static void rockchip_dfi_disable(struct rockchip_dfi *dfi) if (!(dfi->channel_mask & BIT(i))) continue; - writel_relaxed(HIWORD_UPDATE(0, DDRMON_CTRL_SOFTWARE_EN), - dfi_regs + i * dfi->ddrmon_stride + DDRMON_CTRL); + writel_relaxed(FIELD_PREP_WM16(DDRMON_CTRL_SOFTWARE_EN, 0), + dfi_regs + i * dfi->ddrmon_stride + DDRMON_CTRL); if (dfi->ddrmon_ctrl_single) break; -- cgit v1.2.3 From 1f4a222b0e334540343fbb5d3eac4584a6bfe180 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 15 Oct 2025 07:57:28 -0700 Subject: Remove long-stale ext3 defconfig option Inspired by commit c065b6046b34 ("Use CONFIG_EXT4_FS instead of CONFIG_EXT3_FS in all of the defconfigs") I looked around for any other left-over EXT3 config options, and found some old defconfig files still mentioned CONFIG_EXT3_DEFAULTS_TO_ORDERED. That config option was removed a decade ago in commit c290ea01abb7 ("fs: Remove ext3 filesystem driver"). It had a good run, but let's remove it for good. Signed-off-by: Linus Torvalds --- arch/arm/configs/axm55xx_defconfig | 1 - arch/hexagon/configs/comet_defconfig | 1 - arch/sh/configs/ap325rxa_defconfig | 1 - arch/sh/configs/apsh4a3a_defconfig | 1 - arch/sh/configs/apsh4ad0a_defconfig | 1 - arch/sh/configs/ecovec24_defconfig | 1 - arch/sh/configs/edosk7760_defconfig | 1 - arch/sh/configs/espt_defconfig | 1 - arch/sh/configs/landisk_defconfig | 1 - arch/sh/configs/lboxre2_defconfig | 1 - arch/sh/configs/magicpanelr2_defconfig | 1 - arch/sh/configs/r7780mp_defconfig | 1 - arch/sh/configs/r7785rp_defconfig | 1 - arch/sh/configs/rsk7264_defconfig | 1 - arch/sh/configs/rsk7269_defconfig | 1 - arch/sh/configs/sdk7780_defconfig | 1 - arch/sh/configs/sdk7786_defconfig | 1 - arch/sh/configs/se7343_defconfig | 1 - arch/sh/configs/se7712_defconfig | 1 - arch/sh/configs/se7721_defconfig | 1 - arch/sh/configs/se7722_defconfig | 1 - arch/sh/configs/se7724_defconfig | 1 - arch/sh/configs/sh03_defconfig | 1 - arch/sh/configs/sh7763rdp_defconfig | 1 - arch/sh/configs/sh7785lcr_32bit_defconfig | 1 - arch/sh/configs/sh7785lcr_defconfig | 1 - arch/sh/configs/shx3_defconfig | 1 - arch/sh/configs/titan_defconfig | 1 - arch/sh/configs/ul2_defconfig | 1 - arch/sh/configs/urquell_defconfig | 1 - arch/sparc/configs/sparc64_defconfig | 1 - 31 files changed, 31 deletions(-) diff --git a/arch/arm/configs/axm55xx_defconfig b/arch/arm/configs/axm55xx_defconfig index 9b263ea9a878..242a61208a0f 100644 --- a/arch/arm/configs/axm55xx_defconfig +++ b/arch/arm/configs/axm55xx_defconfig @@ -195,7 +195,6 @@ CONFIG_PL320_MBOX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=y diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig index b132752693a9..22d7f8ac58a3 100644 --- a/arch/hexagon/configs/comet_defconfig +++ b/arch/hexagon/configs/comet_defconfig @@ -47,7 +47,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index 336dbacd89bd..48b2e97114f9 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -82,7 +82,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig index 59daf99ea745..85db9ce42d1a 100644 --- a/arch/sh/configs/apsh4a3a_defconfig +++ b/arch/sh/configs/apsh4a3a_defconfig @@ -61,7 +61,6 @@ CONFIG_LOGO=y # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index df2a669ea9d8..e8b3b720578b 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -89,7 +89,6 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index dd7e54c451d6..fcca7cc5a75a 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -110,7 +110,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig index 711db47b65ba..98f4611ba553 100644 --- a/arch/sh/configs/edosk7760_defconfig +++ b/arch/sh/configs/edosk7760_defconfig @@ -88,7 +88,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_XIP=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_NFS_FS=y diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index f8cad1e7a333..e5d102cbff89 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -60,7 +60,6 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_AUTOFS_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index 08342ceee32e..22177aa8f961 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -94,7 +94,6 @@ CONFIG_USB_EMI26=m CONFIG_USB_SISUSBVGA=m CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_ISO9660_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 96a21173522d..ff992301622b 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -50,7 +50,6 @@ CONFIG_HW_RANDOM=y CONFIG_RTC_CLASS=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig index af7f777b20be..a29fb912a242 100644 --- a/arch/sh/configs/magicpanelr2_defconfig +++ b/arch/sh/configs/magicpanelr2_defconfig @@ -65,7 +65,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_PROC_KCORE=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index 11f210517f76..58b792dacfec 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -75,7 +75,6 @@ CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index ae367d7a14a8..7edf18451158 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -70,7 +70,6 @@ CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig index 3aba0102304f..28a81efefb02 100644 --- a/arch/sh/configs/rsk7264_defconfig +++ b/arch/sh/configs/rsk7264_defconfig @@ -60,7 +60,6 @@ CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_VFAT_FS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig index f82f280fc55a..f8bfa46643ff 100644 --- a/arch/sh/configs/rsk7269_defconfig +++ b/arch/sh/configs/rsk7269_defconfig @@ -44,7 +44,6 @@ CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_VFAT_FS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index 3b51195bf1b5..311817161afb 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -103,7 +103,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index ebb3f4420ae8..2433aa5f44a8 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -162,7 +162,6 @@ CONFIG_STAGING=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_BTRFS_FS=y diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig index 6ef546ee8a32..b0baa5771c26 100644 --- a/arch/sh/configs/se7343_defconfig +++ b/arch/sh/configs/se7343_defconfig @@ -85,7 +85,6 @@ CONFIG_USB_ISP116X_HCD=y CONFIG_UIO=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 4cecf9c06a65..1078c286a610 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -84,7 +84,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index c28057b70ad7..edb9e0d2dce5 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -108,7 +108,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig index 88bfd953ef89..33daa0a17a32 100644 --- a/arch/sh/configs/se7722_defconfig +++ b/arch/sh/configs/se7722_defconfig @@ -45,7 +45,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index e1b2616ef921..d572655f842d 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -111,7 +111,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 306e1661fbf5..3d194d81c92b 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -58,7 +58,6 @@ CONFIG_SH_WDT=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=m diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index 85ec00b7dbd2..e7b72ff377a8 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -62,7 +62,6 @@ CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_AUTOFS_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index e860a20d3f0f..17d2471d8e51 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -114,7 +114,6 @@ CONFIG_DMADEVICES=y CONFIG_UIO=m CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig index 33c98b4a2adb..34c8fe755add 100644 --- a/arch/sh/configs/sh7785lcr_defconfig +++ b/arch/sh/configs/sh7785lcr_defconfig @@ -91,7 +91,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index 3169e4dc7004..52e7a42d66c7 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -85,7 +85,6 @@ CONFIG_RTC_DRV_SH=y CONFIG_UIO=m CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 6bff00038072..2c474645ec36 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -216,7 +216,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=m CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT4_FS_XATTR is not set CONFIG_XFS_FS=m CONFIG_FUSE_FS=m diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig index b89eb8f5cc5c..b0c2ba478353 100644 --- a/arch/sh/configs/ul2_defconfig +++ b/arch/sh/configs/ul2_defconfig @@ -67,7 +67,6 @@ CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index 60cb716ef195..e6d807f52253 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -115,7 +115,6 @@ CONFIG_RTC_DRV_SH=y CONFIG_RTC_DRV_GENERIC=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_BTRFS_FS=y CONFIG_MSDOS_FS=y diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 200640b93e05..127940aafc39 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -188,7 +188,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y -- cgit v1.2.3 From 0187c08058da3e7f11b356ac27e0c427d36f33f2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 14 Oct 2025 21:28:44 -0700 Subject: HID: hid-input: only ignore 0 battery events for digitizers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 581c4484769e ("HID: input: map digitizer battery usage") added handling of battery events for digitizers (typically for batteries presented in stylii). Digitizers typically report correct battery levels only when stylus is actively touching the surface, and in other cases they may report battery level of 0. To avoid confusing consumers of the battery information the code was added to filer out reports with 0 battery levels. However there exist other kinds of devices that may legitimately report 0 battery levels. Fix this by filtering out 0-level reports only for digitizer usages, and continue reporting them for other kinds of devices (Smart Batteries, etc). Reported-by: 卢国宏 Fixes: 581c4484769e ("HID: input: map digitizer battery usage") Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- drivers/hid/hid-input.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 5d7532d79d21..e56e7de53279 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -635,7 +635,10 @@ static void hidinput_update_battery(struct hid_device *dev, unsigned int usage, return; } - if (value == 0 || value < dev->battery_min || value > dev->battery_max) + if ((usage & HID_USAGE_PAGE) == HID_UP_DIGITIZER && value == 0) + return; + + if (value < dev->battery_min || value > dev->battery_max) return; capacity = hidinput_scale_battery_capacity(dev, value); -- cgit v1.2.3 From aa4daea418ee4215dca5c8636090660c545cb233 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 8 Oct 2025 09:40:33 -0300 Subject: HID: multitouch: fix name of Stylus input devices HID_DG_PEN devices should have a suffix of "Stylus", as pointed out by commit c0ee1d571626 ("HID: hid-input: Add suffix also for HID_DG_PEN"). However, on multitouch devices, these suffixes may be overridden. Before that commit, HID_DG_PEN devices would get the "Stylus" suffix, but after that, multitouch would override them to have an "UNKNOWN" suffix. Just add HID_DG_PEN to the list of non-overriden suffixes in multitouch. Before this fix: [ 0.470981] input: ELAN9008:00 04F3:2E14 UNKNOWN as /devices/pci0000:00/0000:00:15.1/i2c_designware.1/i2c-16/i2c-ELAN9008:00/0018:04F3:2E14.0001/input/input8 ELAN9008:00 04F3:2E14 UNKNOWN After this fix: [ 0.474332] input: ELAN9008:00 04F3:2E14 Stylus as /devices/pci0000:00/0000:00:15.1/i2c_designware.1/i2c-16/i2c-ELAN9008:00/0018:04F3:2E14.0001/input/input8 ELAN9008:00 04F3:2E14 Stylus Fixes: c0ee1d571626 ("HID: hid-input: Add suffix also for HID_DG_PEN") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Mika Westerberg Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 2879e65cf303..513b8673ad8d 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1742,6 +1742,7 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) case HID_CP_CONSUMER_CONTROL: case HID_GD_WIRELESS_RADIO_CTLS: case HID_GD_SYSTEM_MULTIAXIS: + case HID_DG_PEN: /* already handled by hid core */ break; case HID_DG_TOUCHSCREEN: -- cgit v1.2.3 From 46f781e0d151844589dc2125c8cce3300546f92a Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 8 Oct 2025 16:06:58 +0200 Subject: HID: multitouch: fix sticky fingers The sticky fingers quirk (MT_QUIRK_STICKY_FINGERS) was only considering the case when slots were not released during the last report. This can be problematic if the firmware forgets to release a finger while others are still present. This was observed on the Synaptics DLL0945 touchpad found on the Dell XPS 9310 and the Dell Inspiron 5406. Fixes: 4f4001bc76fd ("HID: multitouch: fix rare Win 8 cases when the touch up event gets missing") Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 513b8673ad8d..179dc316b4b5 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -94,9 +94,8 @@ enum report_mode { TOUCHPAD_REPORT_ALL = TOUCHPAD_REPORT_BUTTONS | TOUCHPAD_REPORT_CONTACTS, }; -#define MT_IO_FLAGS_RUNNING 0 -#define MT_IO_FLAGS_ACTIVE_SLOTS 1 -#define MT_IO_FLAGS_PENDING_SLOTS 2 +#define MT_IO_SLOTS_MASK GENMASK(7, 0) /* reserve first 8 bits for slot tracking */ +#define MT_IO_FLAGS_RUNNING 32 static const bool mtrue = true; /* default for true */ static const bool mfalse; /* default for false */ @@ -172,7 +171,11 @@ struct mt_device { struct timer_list release_timer; /* to release sticky fingers */ struct hid_haptic_device *haptic; /* haptic related configuration */ struct hid_device *hdev; /* hid_device we're attached to */ - unsigned long mt_io_flags; /* mt flags (MT_IO_FLAGS_*) */ + unsigned long mt_io_flags; /* mt flags (MT_IO_FLAGS_RUNNING) + * first 8 bits are reserved for keeping the slot + * states, this is fine because we only support up + * to 250 slots (MT_MAX_MAXCONTACT) + */ __u8 inputmode_value; /* InputMode HID feature value */ __u8 maxcontacts; bool is_buttonpad; /* is this device a button pad? */ @@ -986,6 +989,7 @@ static void mt_release_pending_palms(struct mt_device *td, for_each_set_bit(slotnum, app->pending_palm_slots, td->maxcontacts) { clear_bit(slotnum, app->pending_palm_slots); + clear_bit(slotnum, &td->mt_io_flags); input_mt_slot(input, slotnum); input_mt_report_slot_inactive(input); @@ -1019,12 +1023,6 @@ static void mt_sync_frame(struct mt_device *td, struct mt_application *app, app->left_button_state = 0; if (td->is_haptic_touchpad) hid_haptic_pressure_reset(td->haptic); - - if (test_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags)) - set_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags); - else - clear_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags); - clear_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags); } static int mt_compute_timestamp(struct mt_application *app, __s32 value) @@ -1202,7 +1200,9 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input, input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major); input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor); - set_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags); + set_bit(slotnum, &td->mt_io_flags); + } else { + clear_bit(slotnum, &td->mt_io_flags); } return 0; @@ -1337,7 +1337,7 @@ static void mt_touch_report(struct hid_device *hid, * defect. */ if (app->quirks & MT_QUIRK_STICKY_FINGERS) { - if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags)) + if (td->mt_io_flags & MT_IO_SLOTS_MASK) mod_timer(&td->release_timer, jiffies + msecs_to_jiffies(100)); else @@ -1814,6 +1814,7 @@ static void mt_release_contacts(struct hid_device *hid) for (i = 0; i < mt->num_slots; i++) { input_mt_slot(input_dev, i); input_mt_report_slot_inactive(input_dev); + clear_bit(i, &td->mt_io_flags); } input_mt_sync_frame(input_dev); input_sync(input_dev); @@ -1836,7 +1837,7 @@ static void mt_expired_timeout(struct timer_list *t) */ if (test_and_set_bit_lock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags)) return; - if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags)) + if (td->mt_io_flags & MT_IO_SLOTS_MASK) mt_release_contacts(hdev); clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); } -- cgit v1.2.3 From d9b3014a7f1425011909ad358dc0c8f187853a12 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 8 Oct 2025 16:06:59 +0200 Subject: selftests/hid: add tests for missing release on the Dell Synaptics Add a simple test for the corner case not currently covered by the sticky fingers quirk. Because it's a corner case test, we only test this on a couple of devices, not on all of them because the value of adding the same test over and over is rather moot. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- .../testing/selftests/hid/tests/test_multitouch.py | 55 ++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tools/testing/selftests/hid/tests/test_multitouch.py b/tools/testing/selftests/hid/tests/test_multitouch.py index 5d2ffa3d5977..ece0ba8e7d34 100644 --- a/tools/testing/selftests/hid/tests/test_multitouch.py +++ b/tools/testing/selftests/hid/tests/test_multitouch.py @@ -1752,6 +1752,52 @@ class TestWin8TSConfidence(BaseTest.TestWin8Multitouch): assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + @pytest.mark.skip_if_uhdev( + lambda uhdev: "Confidence" not in uhdev.fields, + "Device not compatible, missing Confidence usage", + ) + def test_mt_confidence_bad_multi_release(self): + """Check for the sticky finger being properly detected. + + We first inject 3 fingers, then release only the second. + After 100 ms, we should receive a generated event about the + 2 missing fingers being released. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + # send 3 touches + t0 = Touch(1, 50, 10) + t1 = Touch(2, 150, 100) + t2 = Touch(3, 250, 200) + r = uhdev.event([t0, t1, t2]) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + + # release the second + t1.tipswitch = False + r = uhdev.event([t1]) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + + # only the second is released + assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1 + assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1 + + # wait for the timer to kick in + time.sleep(0.2) + + events = uhdev.next_sync_events() + self.debug_reports([], uhdev, events) + + # now all 3 fingers are released + assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events + assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + + class TestElanXPS9360(BaseTest.TestWin8Multitouch): def create_device(self): return Digitizer( @@ -2086,3 +2132,12 @@ class Testsynaptics_06cb_ce08(BaseTest.TestPTP): input_info=(BusType.I2C, 0x06CB, 0xCE08), rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 01 09 02 a1 01 85 18 09 01 a1 00 05 09 19 01 29 03 46 00 00 15 00 25 01 75 01 95 03 81 02 95 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 06 00 ff 09 02 a1 01 85 20 09 01 a1 00 09 03 15 00 26 ff 00 35 00 46 ff 00 75 08 95 05 81 02 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 45 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 45 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0", ) + +class Testsynaptics_06cb_ce26(TestWin8TSConfidence): + def create_device(self): + return PTP( + "uhid test synaptics_06cb_ce26", + max_contacts=5, + input_info=(BusType.I2C, 0x06CB, 0xCE26), + rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 3d 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 3d 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0", + ) -- cgit v1.2.3 From 409b9499099b4ad14ca60b59c6edfebfaf74f907 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Tue, 14 Oct 2025 19:58:24 +0530 Subject: drm/xe/uapi: Add documentation for DRM_XE_GEM_CREATE_FLAG_SCANOUT Add documentation for drm_xe_gem_create structure flag DRM_XE_GEM_CREATE_FLAG_SCANOUT. Signed-off-by: Sanjay Yadav Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://lore.kernel.org/r/20251014142823.3701228-2-sanjay.kumar.yadav@intel.com --- include/uapi/drm/xe_drm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 40ff19f52a8d..2d7945cda739 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -771,7 +771,11 @@ struct drm_xe_device_query { * until the object is either bound to a virtual memory region via * VM_BIND or accessed by the CPU. As a result, no backing memory is * reserved at the time of GEM object creation. - * - %DRM_XE_GEM_CREATE_FLAG_SCANOUT + * - %DRM_XE_GEM_CREATE_FLAG_SCANOUT - Indicates that the GEM object is + * intended for scanout via the display engine. When set, kernel ensures + * that the allocation is placed in a memory region compatible with the + * display engine requirements. This may impose restrictions on tiling, + * alignment, and memory placement to guarantee proper display functionality. * - %DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM - When using VRAM as a * possible placement, ensure that the corresponding VRAM allocation * will always use the CPU accessible part of VRAM. This is important -- cgit v1.2.3 From bfdd74166a639930baaba27a8d729edaacd46907 Mon Sep 17 00:00:00 2001 From: Tim Hostetler Date: Tue, 14 Oct 2025 00:47:39 +0000 Subject: gve: Check valid ts bit on RX descriptor before hw timestamping The device returns a valid bit in the LSB of the low timestamp byte in the completion descriptor that the driver should check before setting the SKB's hardware timestamp. If the timestamp is not valid, do not hardware timestamp the SKB. Cc: stable@vger.kernel.org Fixes: b2c7aeb49056 ("gve: Implement ndo_hwtstamp_get/set for RX timestamping") Reviewed-by: Joshua Washington Signed-off-by: Tim Hostetler Signed-off-by: Harshitha Ramamurthy Reviewed-by: Simon Horman Reviewed-by: Willem de Bruijn Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251014004740.2775957-1-hramamurthy@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve.h | 2 ++ drivers/net/ethernet/google/gve/gve_desc_dqo.h | 3 ++- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 18 ++++++++++++------ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index bceaf9b05cb4..4cc6dcbfd367 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -100,6 +100,8 @@ */ #define GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD 96 +#define GVE_DQO_RX_HWTSTAMP_VALID 0x1 + /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */ struct gve_rx_desc_queue { struct gve_rx_desc *desc_ring; /* the descriptor ring */ diff --git a/drivers/net/ethernet/google/gve/gve_desc_dqo.h b/drivers/net/ethernet/google/gve/gve_desc_dqo.h index d17da841b5a0..f7786b03c744 100644 --- a/drivers/net/ethernet/google/gve/gve_desc_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_desc_dqo.h @@ -236,7 +236,8 @@ struct gve_rx_compl_desc_dqo { u8 status_error1; - __le16 reserved5; + u8 reserved5; + u8 ts_sub_nsecs_low; __le16 buf_id; /* Buffer ID which was sent on the buffer queue. */ union { diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 55393b784317..1aff3bbb8cfc 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -456,14 +456,20 @@ static void gve_rx_skb_hash(struct sk_buff *skb, * Note that this means if the time delta between packet reception and the last * clock read is greater than ~2 seconds, this will provide invalid results. */ -static void gve_rx_skb_hwtstamp(struct gve_rx_ring *rx, u32 hwts) +static void gve_rx_skb_hwtstamp(struct gve_rx_ring *rx, + const struct gve_rx_compl_desc_dqo *desc) { u64 last_read = READ_ONCE(rx->gve->last_sync_nic_counter); struct sk_buff *skb = rx->ctx.skb_head; - u32 low = (u32)last_read; - s32 diff = hwts - low; - - skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(last_read + diff); + u32 ts, low; + s32 diff; + + if (desc->ts_sub_nsecs_low & GVE_DQO_RX_HWTSTAMP_VALID) { + ts = le32_to_cpu(desc->ts); + low = (u32)last_read; + diff = ts - low; + skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(last_read + diff); + } } static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) @@ -944,7 +950,7 @@ static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype); if (rx->gve->ts_config.rx_filter == HWTSTAMP_FILTER_ALL) - gve_rx_skb_hwtstamp(rx, le32_to_cpu(desc->ts)); + gve_rx_skb_hwtstamp(rx, desc); /* RSC packets must set gso_size otherwise the TCP stack will complain * that packets are larger than MTU. -- cgit v1.2.3 From d451a0e88e9fa710df33f8dd5dc7ca63e22ef211 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 15 Oct 2025 17:05:04 +0200 Subject: smb: client: let smbd_destroy() wait for SMBDIRECT_SOCKET_DISCONNECTED We should wait for the rdma_cm to become SMBDIRECT_SOCKET_DISCONNECTED, it turns out that (at least running some xfstests e.g. cifs/001) often triggers the case where wait_event_interruptible() returns with -ERESTARTSYS instead of waiting for SMBDIRECT_SOCKET_DISCONNECTED to be reached. Or we are already in SMBDIRECT_SOCKET_DISCONNECTING and never wait for SMBDIRECT_SOCKET_DISCONNECTED. Fixes: 050b8c374019 ("smbd: Make upper layer decide when to destroy the transport") Fixes: e8b3bfe9bc65 ("cifs: smbd: Don't destroy transport on RDMA disconnect") Fixes: b0aa92a229ab ("smb: client: make sure smbd_disconnect_rdma_work() doesn't run after smbd_destroy() took over") Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 77de85d7cdc3..49e2df3ad1f0 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1575,12 +1575,12 @@ void smbd_destroy(struct TCP_Server_Info *server) disable_work_sync(&sc->disconnect_work); log_rdma_event(INFO, "destroying rdma session\n"); - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) { + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) smbd_disconnect_rdma_work(&sc->disconnect_work); + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) { log_rdma_event(INFO, "wait for transport being disconnected\n"); - wait_event_interruptible( - sc->status_wait, - sc->status == SMBDIRECT_SOCKET_DISCONNECTED); + wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); + log_rdma_event(INFO, "waited for transport being disconnected\n"); } /* -- cgit v1.2.3 From 2aab1f993c8cb753ccb3d5b848cd758e2e87d965 Mon Sep 17 00:00:00 2001 From: Ankan Biswas Date: Wed, 15 Oct 2025 20:50:57 +0530 Subject: drm/gpuvm: Fix kernel-doc warning for drm_gpuvm_map_req.map The kernel-doc for struct drm_gpuvm_map_req.map was added as '@op_map' instead of '@map', leading to this warning during htmldocs build: WARNING: include/drm/drm_gpuvm.h:1083 struct member 'map' not described in 'drm_gpuvm_map_req' Fixes: 000a45dce7ad ("drm/gpuvm: Pass map arguments through a struct") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20250821133539.03aa298e@canb.auug.org.au/ Signed-off-by: Ankan Biswas Signed-off-by: Danilo Krummrich --- include/drm/drm_gpuvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 8890ded1d907..476990e761f8 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -1078,7 +1078,7 @@ struct drm_gpuva_ops { */ struct drm_gpuvm_map_req { /** - * @op_map: struct drm_gpuva_op_map + * @map: struct drm_gpuva_op_map */ struct drm_gpuva_op_map map; }; -- cgit v1.2.3 From fcd298fdc2a32f1d90cdf9a452c5c5fdc6e8d137 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 10 Oct 2025 12:03:20 -0400 Subject: ASoC: dt-bindings: Add compatible string fsl,imx-audio-tlv320 Add compatible string fsl,imx-audio-tlv320 to fix below CHECK_DTBS warning: arch/arm/boot/dts/nxp/imx/imx6dl-gw5903.dtb: /sound: failed to match any schema with compatible: ['fsl,imx-audio-tlv320'] Signed-off-by: Frank Li Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20251010160321.2130093-1-Frank.Li@nxp.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/fsl-asoc-card.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/sound/fsl-asoc-card.yaml b/Documentation/devicetree/bindings/sound/fsl-asoc-card.yaml index 92aa47ec72c7..88eb20bb008f 100644 --- a/Documentation/devicetree/bindings/sound/fsl-asoc-card.yaml +++ b/Documentation/devicetree/bindings/sound/fsl-asoc-card.yaml @@ -79,6 +79,7 @@ properties: - fsl,imx-audio-nau8822 - fsl,imx-audio-sgtl5000 - fsl,imx-audio-si476x + - fsl,imx-audio-tlv320 - fsl,imx-audio-tlv320aic31xx - fsl,imx-audio-tlv320aic32x4 - fsl,imx-audio-wm8524 -- cgit v1.2.3 From 7a37291ed40a33a5f6c3d370fdde5ee0d8f7d0e4 Mon Sep 17 00:00:00 2001 From: Sharique Mohammad Date: Wed, 15 Oct 2025 15:42:15 +0200 Subject: ASoC: max98090/91: fixed max98091 ALSA widget powering up/down The widgets DMIC3_ENA and DMIC4_ENA must be defined in the DAPM suppy widget, just like DMICL_ENA and DMICR_ENA. Whenever they are turned on or off, the required startup or shutdown sequences must be taken care by the max98090_shdn_event. Signed-off-by: Sharique Mohammad Link: https://patch.msgid.link/20251015134215.750001-1-sharq0406@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98090.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index 22177c1ce160..cb1508fc99f8 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -1234,9 +1234,11 @@ static const struct snd_soc_dapm_widget max98091_dapm_widgets[] = { SND_SOC_DAPM_INPUT("DMIC4"), SND_SOC_DAPM_SUPPLY("DMIC3_ENA", M98090_REG_DIGITAL_MIC_ENABLE, - M98090_DIGMIC3_SHIFT, 0, NULL, 0), + M98090_DIGMIC3_SHIFT, 0, max98090_shdn_event, + SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_SUPPLY("DMIC4_ENA", M98090_REG_DIGITAL_MIC_ENABLE, - M98090_DIGMIC4_SHIFT, 0, NULL, 0), + M98090_DIGMIC4_SHIFT, 0, max98090_shdn_event, + SND_SOC_DAPM_POST_PMU), }; static const struct snd_soc_dapm_route max98090_dapm_routes[] = { -- cgit v1.2.3 From 5726b68473f7153a7f6294185e5998b7e2a230a2 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Wed, 15 Oct 2025 15:55:30 +0800 Subject: ASoC: amd/sdw_utils: avoid NULL deref when devm_kasprintf() fails devm_kasprintf() may return NULL on memory allocation failure, but the debug message prints cpus->dai_name before checking it. Move the dev_dbg() call after the NULL check to prevent potential NULL pointer dereference. Fixes: cb8ea62e64020 ("ASoC: amd/sdw_utils: add sof based soundwire generic machine driver") Signed-off-by: Li Qiang Link: https://patch.msgid.link/20251015075530.146851-1-liqiang01@kylinos.cn Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-sdw-sof-mach.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/amd/acp/acp-sdw-sof-mach.c b/sound/soc/amd/acp/acp-sdw-sof-mach.c index 91d72d4bb9a2..d055582a3bf1 100644 --- a/sound/soc/amd/acp/acp-sdw-sof-mach.c +++ b/sound/soc/amd/acp/acp-sdw-sof-mach.c @@ -176,9 +176,9 @@ static int create_sdw_dailink(struct snd_soc_card *card, cpus->dai_name = devm_kasprintf(dev, GFP_KERNEL, "SDW%d Pin%d", link_num, cpu_pin_id); - dev_dbg(dev, "cpu->dai_name:%s\n", cpus->dai_name); if (!cpus->dai_name) return -ENOMEM; + dev_dbg(dev, "cpu->dai_name:%s\n", cpus->dai_name); codec_maps[j].cpu = 0; codec_maps[j].codec = j; -- cgit v1.2.3 From e6416c2dfe23c9a6fec881fda22ebb9ae486cfc5 Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Sat, 11 Oct 2025 00:59:58 +0800 Subject: x86/CPU/AMD: Prevent reset reasons from being retained across reboot The S5_RESET_STATUS register is parsed on boot and printed to kmsg. However, this could sometimes be misleading and lead to users wasting a lot of time on meaningless debugging for two reasons: * Some bits are never cleared by hardware. It's the software's responsibility to clear them as per the Processor Programming Reference (see [1]). * Some rare hardware-initiated platform resets do not update the register at all. In both cases, a previous reboot could leave its trace in the register, resulting in users seeing unrelated reboot reasons while debugging random reboots afterward. Write the read value back to the register in order to clear all reason bits since they are write-1-to-clear while the others must be preserved. [1]: https://bugzilla.kernel.org/show_bug.cgi?id=206537#attach_303991 [ bp: Massage commit message. ] Fixes: ab8131028710 ("x86/CPU/AMD: Print the reason for the last reset") Signed-off-by: Rong Zhang Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Yazen Ghannam Cc: Link: https://lore.kernel.org/all/20250913144245.23237-1-i@rong.moe/ --- arch/x86/kernel/cpu/amd.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5398db4dedb4..ccaa51ce63f6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1355,11 +1355,23 @@ static __init int print_s5_reset_status_mmio(void) return 0; value = ioread32(addr); - iounmap(addr); /* Value with "all bits set" is an error response and should be ignored. */ - if (value == U32_MAX) + if (value == U32_MAX) { + iounmap(addr); return 0; + } + + /* + * Clear all reason bits so they won't be retained if the next reset + * does not update the register. Besides, some bits are never cleared by + * hardware so it's software's responsibility to clear them. + * + * Writing the value back effectively clears all reason bits as they are + * write-1-to-clear. + */ + iowrite32(value, addr); + iounmap(addr); for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) { if (!(value & BIT(i))) -- cgit v1.2.3 From 18d6b1743eafeb3fb1e0ea5a2b7fd0a773d525a8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 15 Oct 2025 13:38:53 -0600 Subject: io_uring/rw: check for NULL io_br_sel when putting a buffer Both the read and write side use kiocb_done() to finish a request, and kiocb_done() will call io_put_kbuf() in case a provided buffer was used for the request. Provided buffers are not supported for writes, hence NULL is being passed in. This normally works fine, as io_put_kbuf() won't actually use the value unless REQ_F_BUFFER_RING or REQ_F_BUFFER_SELECTED is set in the request flags. But depending on compiler (or whether or not CONFIG_CC_OPTIMIZE_FOR_SIZE is set), that may be done even though the value is never used. This will then cause a NULL pointer dereference. Make it a bit more obvious and check for a NULL io_br_sel, and don't even bother calling io_put_kbuf() for that case. Fixes: 5fda51255439 ("io_uring/kbuf: switch to storing struct io_buffer_list locally") Reported-by: David Howells Tested-by: David Howells Signed-off-by: Jens Axboe --- io_uring/rw.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index a0f9d2021e3f..5b2241a5813c 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -655,13 +655,17 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret, if (ret >= 0 && req->flags & REQ_F_CUR_POS) req->file->f_pos = rw->kiocb.ki_pos; if (ret >= 0 && !(req->ctx->flags & IORING_SETUP_IOPOLL)) { + u32 cflags = 0; + __io_complete_rw_common(req, ret); /* * Safe to call io_end from here as we're inline * from the submission path. */ io_req_io_end(req); - io_req_set_res(req, final_ret, io_put_kbuf(req, ret, sel->buf_list)); + if (sel) + cflags = io_put_kbuf(req, ret, sel->buf_list); + io_req_set_res(req, final_ret, cflags); io_req_rw_cleanup(req, issue_flags); return IOU_COMPLETE; } else { -- cgit v1.2.3 From bc384963bc18e4f21cf8615b57cbbc9c5e0d309a Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 14 Oct 2025 19:06:50 +0200 Subject: MAINTAINERS: new entry for IPv6 IOAM Create a maintainer entry for IPv6 IOAM. Add myself as I authored most if not all of the IPv6 IOAM code in the kernel and actively participate in the related IETF groups. Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20251014170650.27679-1-justin.iurman@uliege.be Signed-off-by: Jakub Kicinski --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8a213950e37e..eba5e091a086 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18012,6 +18012,16 @@ X: net/rfkill/ X: net/wireless/ X: tools/testing/selftests/net/can/ +NETWORKING [IOAM] +M: Justin Iurman +S: Maintained +F: Documentation/networking/ioam6* +F: include/linux/ioam6* +F: include/net/ioam6* +F: include/uapi/linux/ioam6* +F: net/ipv6/ioam6* +F: tools/testing/selftests/net/ioam6* + NETWORKING [IPSEC] M: Steffen Klassert M: Herbert Xu -- cgit v1.2.3 From 0c3f2e62815a43628e748b1e4ad97a1c46cce703 Mon Sep 17 00:00:00 2001 From: Alexey Simakov Date: Tue, 14 Oct 2025 19:47:38 +0300 Subject: tg3: prevent use of uninitialized remote_adv and local_adv variables Some execution paths that jump to the fiber_setup_done label could leave the remote_adv and local_adv variables uninitialized and then use it. Initialize this variables at the point of definition to avoid this. Fixes: 85730a631f0c ("tg3: Add SGMII phy support for 5719/5718 serdes") Co-developed-by: Alexandr Sapozhnikov Signed-off-by: Alexandr Sapozhnikov Signed-off-by: Alexey Simakov Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/20251014164736.5890-1-bigalex934@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 7f00ec7fd7b9..d78cafdb2094 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -5803,7 +5803,7 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, bool force_reset) u32 current_speed = SPEED_UNKNOWN; u8 current_duplex = DUPLEX_UNKNOWN; bool current_link_up = false; - u32 local_adv, remote_adv, sgsr; + u32 local_adv = 0, remote_adv = 0, sgsr; if ((tg3_asic_rev(tp) == ASIC_REV_5719 || tg3_asic_rev(tp) == ASIC_REV_5720) && @@ -5944,9 +5944,6 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, bool force_reset) else current_duplex = DUPLEX_HALF; - local_adv = 0; - remote_adv = 0; - if (bmcr & BMCR_ANENABLE) { u32 common; -- cgit v1.2.3 From ce5af41e3234425a40974696682163edfd21128c Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:16:56 +0200 Subject: tls: trim encrypted message to match the plaintext on short splice During tls_sw_sendmsg_locked, we pre-allocate the encrypted message for the size we're expecting to send during the current iteration, but we may end up sending less, for example when splicing: if we're getting the data from small fragments of memory, we may fill up all the slots in the skmsg with less data than expected. In this case, we need to trim the encrypted message to only the length we actually need, to avoid pushing uninitialized bytes down the underlying TCP socket. Fixes: fe1e81d4f73b ("tls/sw: Support MSG_SPLICE_PAGES") Reported-by: Jann Horn Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/66a0ae99c9efc15f88e9e56c1f58f902f442ce86.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index daac9fd4be7e..36ca3011ab87 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1112,8 +1112,11 @@ alloc_encrypted: goto send_end; tls_ctx->pending_open_record_frags = true; - if (sk_msg_full(msg_pl)) + if (sk_msg_full(msg_pl)) { full_record = true; + sk_msg_trim(sk, msg_en, + msg_pl->sg.size + prot->overhead_size); + } if (full_record || eor) goto copied; -- cgit v1.2.3 From b014a4e066c555185b7c367efacdc33f16695495 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:16:57 +0200 Subject: tls: wait for async encrypt in case of error during latter iterations of sendmsg If we hit an error during the main loop of tls_sw_sendmsg_locked (eg failed allocation), we jump to send_end and immediately return. Previous iterations may have queued async encryption requests that are still pending. We should wait for those before returning, as we could otherwise be reading from memory that userspace believes we're not using anymore, which would be a sort of use-after-free. This is similar to what tls_sw_recvmsg already does: failures during the main loop jump to the "wait for async" code, not straight to the unlock/return. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Reported-by: Jann Horn Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/c793efe9673b87f808d84fdefc0f732217030c52.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 36ca3011ab87..1478d515badc 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1054,7 +1054,7 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, if (ret == -EINPROGRESS) num_async++; else if (ret != -EAGAIN) - goto send_end; + goto end; } } @@ -1226,8 +1226,9 @@ trim_sgl: goto alloc_encrypted; } +send_end: if (!num_async) { - goto send_end; + goto end; } else if (num_zc || eor) { int err; @@ -1245,7 +1246,7 @@ trim_sgl: tls_tx_records(sk, msg->msg_flags); } -send_end: +end: ret = sk_stream_error(sk, msg->msg_flags, ret); return copied > 0 ? copied : ret; } -- cgit v1.2.3 From b6fe4c29bb51cf239ecf48eacf72b924565cb619 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:16:58 +0200 Subject: tls: always set record_type in tls_process_cmsg When userspace wants to send a non-DATA record (via the TLS_SET_RECORD_TYPE cmsg), we need to send any pending data from a previous MSG_MORE send() as a separate DATA record. If that DATA record is encrypted asynchronously, tls_handle_open_record will return -EINPROGRESS. This is currently treated as an error by tls_process_cmsg, and it will skip setting record_type to the correct value, but the caller (tls_sw_sendmsg_locked) handles that return value correctly and proceeds with sending the new message with an incorrect record_type (DATA instead of whatever was requested in the cmsg). Always set record_type before handling the open record. If tls_handle_open_record returns an error, record_type will be ignored. If it succeeds, whether with synchronous crypto (returning 0) or asynchronous (returning -EINPROGRESS), the caller will proceed correctly. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Reported-by: Jann Horn Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/0457252e578a10a94e40c72ba6288b3a64f31662.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index a3ccb3135e51..39a2ab47fe72 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -255,12 +255,9 @@ int tls_process_cmsg(struct sock *sk, struct msghdr *msg, if (msg->msg_flags & MSG_MORE) return -EINVAL; - rc = tls_handle_open_record(sk, msg->msg_flags); - if (rc) - return rc; - *record_type = *(unsigned char *)CMSG_DATA(cmsg); - rc = 0; + + rc = tls_handle_open_record(sk, msg->msg_flags); break; default: return -EINVAL; -- cgit v1.2.3 From b8a6ff84abbcbbc445463de58704686011edc8e1 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:16:59 +0200 Subject: tls: wait for pending async decryptions if tls_strp_msg_hold fails Async decryption calls tls_strp_msg_hold to create a clone of the input skb to hold references to the memory it uses. If we fail to allocate that clone, proceeding with async decryption can lead to various issues (UAF on the skb, writing into userspace memory after the recv() call has returned). In this case, wait for all pending decryption requests. Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Reported-by: Jann Horn Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/b9fe61dcc07dab15da9b35cf4c7d86382a98caf2.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 1478d515badc..e3d852091e7a 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1641,8 +1641,10 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, if (unlikely(darg->async)) { err = tls_strp_msg_hold(&ctx->strp, &ctx->async_hold); - if (err) - __skb_queue_tail(&ctx->async_hold, darg->skb); + if (err) { + err = tls_decrypt_async_wait(ctx); + darg->async = false; + } return err; } -- cgit v1.2.3 From 7f846c65ca11e63d2409868ff039081f80e42ae4 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:17:00 +0200 Subject: tls: don't rely on tx_work during send() With async crypto, we rely on tx_work to actually transmit records once encryption completes. But while send() is running, both the tx_lock and socket lock are held, so tx_work_handler cannot process the queue of encrypted records, and simply reschedules itself. During a large send(), this could last a long time, and use a lot of memory. Transmit any pending encrypted records before restarting the main loop of tls_sw_sendmsg_locked. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Reported-by: Jann Horn Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/8396631478f70454b44afb98352237d33f48d34d.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e3d852091e7a..d17135369980 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1152,6 +1152,13 @@ alloc_encrypted: } else if (ret != -EAGAIN) goto send_end; } + + /* Transmit if any encryptions have completed */ + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, msg->msg_flags); + } + continue; rollback_iter: copied -= try_to_copy; @@ -1207,6 +1214,12 @@ copied: goto send_end; } } + + /* Transmit if any encryptions have completed */ + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, msg->msg_flags); + } } continue; -- cgit v1.2.3 From f95fce1e953b8a7af3fbad84aaffe92804196e2d Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:17:01 +0200 Subject: selftests: net: tls: add tests for cmsg vs MSG_MORE We don't have a test to check that MSG_MORE won't let us merge records of different types across sendmsg calls. Add new tests that check: - MSG_MORE is only allowed for DATA records - a pending DATA record gets closed and pushed before a non-DATA record is processed Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/b34feeadefe8a997f068d5ed5617afd0072df3c0.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index e788b84551ca..77e4e30b46cc 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -564,6 +564,40 @@ TEST_F(tls, msg_more) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls, cmsg_msg_more) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + + /* we don't allow MSG_MORE with non-DATA records */ + EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, + MSG_MORE), -1); + EXPECT_EQ(errno, EINVAL); +} + +TEST_F(tls, msg_more_then_cmsg) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + char buf[10 * 2]; + int ret; + + EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1); + + ret = tls_send_cmsg(self->fd, record_type, test_str, send_len, 0); + EXPECT_EQ(ret, send_len); + + /* initial DATA record didn't get merged with the non-DATA record */ + EXPECT_EQ(recv(self->cfd, buf, send_len * 2, 0), send_len); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); +} + TEST_F(tls, msg_more_unsent) { char const *test_str = "test_read"; -- cgit v1.2.3 From 3667e9b442b95b021189db793b9156552f918e99 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:17:02 +0200 Subject: selftests: tls: add test for short splice due to full skmsg We don't have a test triggering a partial splice caused by a full skmsg. Add one, based on a program by Jann Horn. Use MAX_FRAGS=48 to make sure the skmsg will be full for any allowed value of CONFIG_MAX_SKB_FRAGS (17..45). Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/1d129a15f526ea3602f3a2b368aa0b6f7e0d35d5.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 77e4e30b46cc..5c6d8215021c 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -946,6 +946,37 @@ TEST_F(tls, peek_and_splice) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +#define MAX_FRAGS 48 +TEST_F(tls, splice_short) +{ + struct iovec sendchar_iov; + char read_buf[0x10000]; + char sendbuf[0x100]; + char sendchar = 'S'; + int pipefds[2]; + int i; + + sendchar_iov.iov_base = &sendchar; + sendchar_iov.iov_len = 1; + + memset(sendbuf, 's', sizeof(sendbuf)); + + ASSERT_GE(pipe2(pipefds, O_NONBLOCK), 0); + ASSERT_GE(fcntl(pipefds[0], F_SETPIPE_SZ, (MAX_FRAGS + 1) * 0x1000), 0); + + for (i = 0; i < MAX_FRAGS; i++) + ASSERT_GE(vmsplice(pipefds[1], &sendchar_iov, 1, 0), 0); + + ASSERT_EQ(write(pipefds[1], sendbuf, sizeof(sendbuf)), sizeof(sendbuf)); + + EXPECT_EQ(splice(pipefds[0], NULL, self->fd, NULL, MAX_FRAGS + 0x1000, 0), + MAX_FRAGS + sizeof(sendbuf)); + EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), 0), MAX_FRAGS + sizeof(sendbuf)); + EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EAGAIN); +} +#undef MAX_FRAGS + TEST_F(tls, recvmsg_single) { char const *test_str = "test_recvmsg_single"; -- cgit v1.2.3 From 1a8fed52f7be14e45785e8e54d0d0b50fc17dbd8 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 14 Oct 2025 02:17:25 -0700 Subject: netdevsim: set the carrier when the device goes up Bringing a linked netdevsim device down and then up causes communication failure because both interfaces lack carrier. Basically a ifdown/ifup on the interface make the link broken. Commit 3762ec05a9fbda ("netdevsim: add NAPI support") added supported for NAPI, calling netif_carrier_off() in nsim_stop(). This patch re-enables the carrier symmetrically on nsim_open(), in case the device is linked and the peer is up. Signed-off-by: Breno Leitao Fixes: 3762ec05a9fbda ("netdevsim: add NAPI support") Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251014-netdevsim_fix-v2-1-53b40590dae1@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index ebc3833e95b4..fa1d97885caa 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -545,6 +545,7 @@ static void nsim_enable_napi(struct netdevsim *ns) static int nsim_open(struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); + struct netdevsim *peer; int err; netdev_assert_locked(dev); @@ -555,6 +556,12 @@ static int nsim_open(struct net_device *dev) nsim_enable_napi(ns); + peer = rtnl_dereference(ns->peer); + if (peer && netif_running(peer->netdev)) { + netif_carrier_on(dev); + netif_carrier_on(peer->netdev); + } + return 0; } -- cgit v1.2.3 From 8d93ff40d49d70e05c82a74beae31f883fe0eaf8 Mon Sep 17 00:00:00 2001 From: I Viswanath Date: Mon, 13 Oct 2025 23:46:48 +0530 Subject: net: usb: lan78xx: fix use of improperly initialized dev->chipid in lan78xx_reset dev->chipid is used in lan78xx_init_mac_address before it's initialized: lan78xx_reset() { lan78xx_init_mac_address() lan78xx_read_eeprom() lan78xx_read_raw_eeprom() <- dev->chipid is used here dev->chipid = ... <- dev->chipid is initialized correctly here } Reorder initialization so that dev->chipid is set before calling lan78xx_init_mac_address(). Fixes: a0db7d10b76e ("lan78xx: Add to handle mux control per chip id") Signed-off-by: I Viswanath Reviewed-by: Vadim Fedorenko Reviewed-by: Khalid Aziz Link: https://patch.msgid.link/20251013181648.35153-1-viswanathiyyappan@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 28195d9a8d6b..00397a807393 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3250,10 +3250,6 @@ static int lan78xx_reset(struct lan78xx_net *dev) } } while (buf & HW_CFG_LRST_); - ret = lan78xx_init_mac_address(dev); - if (ret < 0) - return ret; - /* save DEVID for later usage */ ret = lan78xx_read_reg(dev, ID_REV, &buf); if (ret < 0) @@ -3262,6 +3258,10 @@ static int lan78xx_reset(struct lan78xx_net *dev) dev->chipid = (buf & ID_REV_CHIP_ID_MASK_) >> 16; dev->chiprev = buf & ID_REV_CHIP_REV_MASK_; + ret = lan78xx_init_mac_address(dev); + if (ret < 0) + return ret; + /* Respond to the IN token with a NAK */ ret = lan78xx_read_reg(dev, USB_CFG0, &buf); if (ret < 0) -- cgit v1.2.3 From c2b77f42205ef485a647f62082c442c1cd69d3fc Mon Sep 17 00:00:00 2001 From: Shuhao Fu Date: Thu, 16 Oct 2025 02:52:55 +0000 Subject: smb: client: Fix refcount leak for cifs_sb_tlink Fix three refcount inconsistency issues related to `cifs_sb_tlink`. Comments for `cifs_sb_tlink` state that `cifs_put_tlink()` needs to be called after successful calls to `cifs_sb_tlink()`. Three calls fail to update refcount accordingly, leading to possible resource leaks. Fixes: 8ceb98437946 ("CIFS: Move rename to ops struct") Fixes: 2f1afe25997f ("cifs: Use smb 2 - 3 and cifsacl mount options getacl functions") Fixes: 366ed846df60 ("cifs: Use smb 2 - 3 and cifsacl mount options setacl function") Cc: stable@vger.kernel.org Signed-off-by: Shuhao Fu Signed-off-by: Steve French --- fs/smb/client/inode.c | 6 ++++-- fs/smb/client/smb2ops.c | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 239dd84a336f..098a79b7a959 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2431,8 +2431,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, tcon = tlink_tcon(tlink); server = tcon->ses->server; - if (!server->ops->rename) - return -ENOSYS; + if (!server->ops->rename) { + rc = -ENOSYS; + goto do_rename_exit; + } /* try path-based rename first */ rc = server->ops->rename(xid, tcon, from_dentry, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 7c392cf5940b..95cd484cfbba 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3212,8 +3212,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { rc = -ENOMEM; - free_xid(xid); - return ERR_PTR(rc); + goto put_tlink; } oparms = (struct cifs_open_parms) { @@ -3245,6 +3244,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } +put_tlink: cifs_put_tlink(tlink); free_xid(xid); @@ -3285,8 +3285,7 @@ set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen, utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { rc = -ENOMEM; - free_xid(xid); - return rc; + goto put_tlink; } oparms = (struct cifs_open_parms) { @@ -3307,6 +3306,7 @@ set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen, SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } +put_tlink: cifs_put_tlink(tlink); free_xid(xid); return rc; -- cgit v1.2.3 From 6447b0e355562a1ff748c4a2ffb89aae7e84d2c9 Mon Sep 17 00:00:00 2001 From: Eugene Korenevsky Date: Mon, 13 Oct 2025 21:39:30 +0300 Subject: cifs: parse_dfs_referrals: prevent oob on malformed input Malicious SMB server can send invalid reply to FSCTL_DFS_GET_REFERRALS - reply smaller than sizeof(struct get_dfs_referral_rsp) - reply with number of referrals smaller than NumberOfReferrals in the header Processing of such replies will cause oob. Return -EINVAL error on such replies to prevent oob-s. Signed-off-by: Eugene Korenevsky Cc: stable@vger.kernel.org Suggested-by: Nathan Chancellor Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/misc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index dda6dece802a..e10123d8cd7d 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -916,6 +916,14 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, char *data_end; struct dfs_referral_level_3 *ref; + if (rsp_size < sizeof(*rsp)) { + cifs_dbg(VFS | ONCE, + "%s: header is malformed (size is %u, must be %zu)\n", + __func__, rsp_size, sizeof(*rsp)); + rc = -EINVAL; + goto parse_DFS_referrals_exit; + } + *num_of_nodes = le16_to_cpu(rsp->NumberOfReferrals); if (*num_of_nodes < 1) { @@ -925,6 +933,15 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, goto parse_DFS_referrals_exit; } + if (sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3) > rsp_size) { + cifs_dbg(VFS | ONCE, + "%s: malformed buffer (size is %u, must be at least %zu)\n", + __func__, rsp_size, + sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3)); + rc = -EINVAL; + goto parse_DFS_referrals_exit; + } + ref = (struct dfs_referral_level_3 *) &(rsp->referrals); if (ref->VersionNumber != cpu_to_le16(3)) { cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n", -- cgit v1.2.3 From af5fea51411224cae61d54064a55fe22020bd2b7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:31 -0700 Subject: smb: client: Use SHA-512 library for SMB3.1.1 preauth hash Convert smb311_update_preauth_hash() to use the SHA-512 library instead of a "sha512" crypto_shash. This is simpler and faster. With the library there's no need to allocate memory, no need to handle errors, and the SHA-512 code is accessed directly without inefficient indirect calls and other unnecessary API overhead. Remove the call to smb311_crypto_shash_allocate() from smb311_update_preauth_hash(), since it appears to have been needed only to allocate the "sha512" crypto_shash. (It also had the side effect of allocating the "cmac(aes)" crypto_shash, but that's also done in generate_key() which is where the AES-CMAC key is initialized.) For now the "sha512" crypto_shash is still being allocated elsewhere. It will be removed in a later commit. Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/Kconfig | 1 + fs/smb/client/smb2misc.c | 53 +++++++++++------------------------------------ fs/smb/client/smb2proto.h | 6 +++--- 3 files changed, 16 insertions(+), 44 deletions(-) diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig index a4c02199fef4..4ac79ff5649b 100644 --- a/fs/smb/client/Kconfig +++ b/fs/smb/client/Kconfig @@ -16,6 +16,7 @@ config CIFS select CRYPTO_ECB select CRYPTO_AES select CRYPTO_LIB_ARC4 + select CRYPTO_LIB_SHA512 select KEYS select DNS_RESOLVER select ASN1 diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 89d933b4a8bc..96bfe4c63ccf 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -7,6 +7,7 @@ * Pavel Shilovsky (pshilovsky@samba.org) 2012 * */ +#include #include #include "cifsglob.h" #include "cifsproto.h" @@ -888,13 +889,13 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve * @iov: array containing the SMB request we will send to the server * @nvec: number of array entries for the iov */ -int +void smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, struct kvec *iov, int nvec) { - int i, rc; + int i; struct smb2_hdr *hdr; - struct shash_desc *sha512 = NULL; + struct sha512_ctx sha_ctx; hdr = (struct smb2_hdr *)iov[0].iov_base; /* neg prot are always taken */ @@ -907,52 +908,22 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, * and we can test it. Preauth requires 3.1.1 for now. */ if (server->dialect != SMB311_PROT_ID) - return 0; + return; if (hdr->Command != SMB2_SESSION_SETUP) - return 0; + return; /* skip last sess setup response */ if ((hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR) && (hdr->Status == NT_STATUS_OK || (hdr->Status != cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED)))) - return 0; + return; ok: - rc = smb311_crypto_shash_allocate(server); - if (rc) - return rc; - - sha512 = server->secmech.sha512; - rc = crypto_shash_init(sha512); - if (rc) { - cifs_dbg(VFS, "%s: Could not init sha512 shash\n", __func__); - return rc; - } - - rc = crypto_shash_update(sha512, ses->preauth_sha_hash, - SMB2_PREAUTH_HASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not update sha512 shash\n", __func__); - return rc; - } - - for (i = 0; i < nvec; i++) { - rc = crypto_shash_update(sha512, iov[i].iov_base, iov[i].iov_len); - if (rc) { - cifs_dbg(VFS, "%s: Could not update sha512 shash\n", - __func__); - return rc; - } - } - - rc = crypto_shash_final(sha512, ses->preauth_sha_hash); - if (rc) { - cifs_dbg(VFS, "%s: Could not finalize sha512 shash\n", - __func__); - return rc; - } - - return 0; + sha512_init(&sha_ctx); + sha512_update(&sha_ctx, ses->preauth_sha_hash, SMB2_PREAUTH_HASH_SIZE); + for (i = 0; i < nvec; i++) + sha512_update(&sha_ctx, iov[i].iov_base, iov[i].iov_len); + sha512_final(&sha_ctx, ses->preauth_sha_hash); } diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index b3f1398c9f79..e7cda885c39f 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -296,9 +296,9 @@ extern void smb2_copy_fs_info_to_kstatfs( struct smb2_fs_full_size_info *pfs_inf, struct kstatfs *kst); extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server); -extern int smb311_update_preauth_hash(struct cifs_ses *ses, - struct TCP_Server_Info *server, - struct kvec *iov, int nvec); +extern void smb311_update_preauth_hash(struct cifs_ses *ses, + struct TCP_Server_Info *server, + struct kvec *iov, int nvec); extern int smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, const char *path, u32 desired_access, -- cgit v1.2.3 From 4b4c6fdb25de4edc0a34b1b93cccb439e00e1f35 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:32 -0700 Subject: smb: client: Use HMAC-SHA256 library for key generation Convert generate_key() to use the HMAC-SHA256 library instead of a "hmac(sha256)" crypto_shash. This is simpler and faster. With the library there's no need to allocate memory, no need to handle errors, and the HMAC-SHA256 code is accessed directly without inefficient indirect calls and other unnecessary API overhead. Also remove the unnecessary 'hashptr' variable. For now smb3_crypto_shash_allocate() still allocates a "hmac(sha256)" crypto_shash. It will be removed in a later commit. Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/Kconfig | 1 + fs/smb/client/smb2transport.c | 68 +++++++++---------------------------------- 2 files changed, 15 insertions(+), 54 deletions(-) diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig index 4ac79ff5649b..f0c1ff8544f6 100644 --- a/fs/smb/client/Kconfig +++ b/fs/smb/client/Kconfig @@ -16,6 +16,7 @@ config CIFS select CRYPTO_ECB select CRYPTO_AES select CRYPTO_LIB_ARC4 + select CRYPTO_LIB_SHA256 select CRYPTO_LIB_SHA512 select KEYS select DNS_RESOLVER diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 33f33013b392..bde96eace8c9 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "cifsglob.h" #include "cifsproto.h" #include "smb2proto.h" @@ -336,8 +337,8 @@ static int generate_key(struct cifs_ses *ses, struct kvec label, __u8 L256[4] = {0, 0, 1, 0}; int rc = 0; unsigned char prfhash[SMB2_HMACSHA256_SIZE]; - unsigned char *hashptr = prfhash; struct TCP_Server_Info *server = ses->server; + struct hmac_sha256_ctx hmac_ctx; memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE); memset(key, 0x0, key_size); @@ -345,67 +346,26 @@ static int generate_key(struct cifs_ses *ses, struct kvec label, rc = smb3_crypto_shash_allocate(server); if (rc) { cifs_server_dbg(VFS, "%s: crypto alloc failed\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_setkey(server->secmech.hmacsha256->tfm, - ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not set with session key\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_init(server->secmech.hmacsha256); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not init sign hmac\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_update(server->secmech.hmacsha256, i, 4); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with n\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_update(server->secmech.hmacsha256, label.iov_base, label.iov_len); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with label\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_update(server->secmech.hmacsha256, &zero, 1); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with zero\n", __func__); - goto smb3signkey_ret; + return rc; } - rc = crypto_shash_update(server->secmech.hmacsha256, context.iov_base, context.iov_len); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with context\n", __func__); - goto smb3signkey_ret; - } + hmac_sha256_init_usingrawkey(&hmac_ctx, ses->auth_key.response, + SMB2_NTLMV2_SESSKEY_SIZE); + hmac_sha256_update(&hmac_ctx, i, 4); + hmac_sha256_update(&hmac_ctx, label.iov_base, label.iov_len); + hmac_sha256_update(&hmac_ctx, &zero, 1); + hmac_sha256_update(&hmac_ctx, context.iov_base, context.iov_len); if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) { - rc = crypto_shash_update(server->secmech.hmacsha256, L256, 4); + hmac_sha256_update(&hmac_ctx, L256, 4); } else { - rc = crypto_shash_update(server->secmech.hmacsha256, L128, 4); - } - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with L\n", __func__); - goto smb3signkey_ret; + hmac_sha256_update(&hmac_ctx, L128, 4); } + hmac_sha256_final(&hmac_ctx, prfhash); - rc = crypto_shash_final(server->secmech.hmacsha256, hashptr); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__); - goto smb3signkey_ret; - } - - memcpy(key, hashptr, key_size); - -smb3signkey_ret: - return rc; + memcpy(key, prfhash, key_size); + return 0; } struct derivation { -- cgit v1.2.3 From e05b3115e75381369be84abe5d46565ce0fcedc8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:33 -0700 Subject: smb: client: Use HMAC-SHA256 library for SMB2 signature calculation Convert smb2_calc_signature() to use the HMAC-SHA256 library instead of a "hmac(sha256)" crypto_shash. This is simpler and faster. With the library there's no need to allocate memory, no need to handle errors, and the HMAC-SHA256 code is accessed directly without inefficient indirect calls and other unnecessary API overhead. To make this possible, make __cifs_calc_signature() support both the HMAC-SHA256 library and crypto_shash. (crypto_shash is still needed for HMAC-MD5 and AES-CMAC. A later commit will switch HMAC-MD5 from shash to the library. I'd like to eventually do the same for AES-CMAC, but it doesn't have a library API yet. So for now, shash is still needed.) Also remove the unnecessary 'sigptr' variable. For now smb3_crypto_shash_allocate() still allocates a "hmac(sha256)" crypto_shash. It will be removed in a later commit. Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/cifsencrypt.c | 52 +++++++++++++++++++++++++++++------------- fs/smb/client/cifsproto.h | 9 +++++--- fs/smb/client/smb2transport.c | 53 +++++++++---------------------------------- 3 files changed, 53 insertions(+), 61 deletions(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 7b7c8c38fdd0..9522088a1cfb 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -24,14 +24,34 @@ #include #include #include +#include -static size_t cifs_shash_step(void *iter_base, size_t progress, size_t len, - void *priv, void *priv2) +static int cifs_sig_update(struct cifs_calc_sig_ctx *ctx, + const u8 *data, size_t len) { - struct shash_desc *shash = priv; + if (ctx->hmac) { + hmac_sha256_update(ctx->hmac, data, len); + return 0; + } + return crypto_shash_update(ctx->shash, data, len); +} + +static int cifs_sig_final(struct cifs_calc_sig_ctx *ctx, u8 *out) +{ + if (ctx->hmac) { + hmac_sha256_final(ctx->hmac, out); + return 0; + } + return crypto_shash_final(ctx->shash, out); +} + +static size_t cifs_sig_step(void *iter_base, size_t progress, size_t len, + void *priv, void *priv2) +{ + struct cifs_calc_sig_ctx *ctx = priv; int ret, *pret = priv2; - ret = crypto_shash_update(shash, iter_base, len); + ret = cifs_sig_update(ctx, iter_base, len); if (ret < 0) { *pret = ret; return len; @@ -42,21 +62,20 @@ static size_t cifs_shash_step(void *iter_base, size_t progress, size_t len, /* * Pass the data from an iterator into a hash. */ -static int cifs_shash_iter(const struct iov_iter *iter, size_t maxsize, - struct shash_desc *shash) +static int cifs_sig_iter(const struct iov_iter *iter, size_t maxsize, + struct cifs_calc_sig_ctx *ctx) { struct iov_iter tmp_iter = *iter; int err = -EIO; - if (iterate_and_advance_kernel(&tmp_iter, maxsize, shash, &err, - cifs_shash_step) != maxsize) + if (iterate_and_advance_kernel(&tmp_iter, maxsize, ctx, &err, + cifs_sig_step) != maxsize) return err; return 0; } -int __cifs_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, char *signature, - struct shash_desc *shash) +int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, + char *signature, struct cifs_calc_sig_ctx *ctx) { int i; ssize_t rc; @@ -82,8 +101,7 @@ int __cifs_calc_signature(struct smb_rqst *rqst, return -EIO; } - rc = crypto_shash_update(shash, - iov[i].iov_base, iov[i].iov_len); + rc = cifs_sig_update(ctx, iov[i].iov_base, iov[i].iov_len); if (rc) { cifs_dbg(VFS, "%s: Could not update with payload\n", __func__); @@ -91,11 +109,11 @@ int __cifs_calc_signature(struct smb_rqst *rqst, } } - rc = cifs_shash_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), shash); + rc = cifs_sig_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), ctx); if (rc < 0) return rc; - rc = crypto_shash_final(shash, signature); + rc = cifs_sig_final(ctx, signature); if (rc) cifs_dbg(VFS, "%s: Could not generate hash\n", __func__); @@ -134,7 +152,9 @@ static int cifs_calc_signature(struct smb_rqst *rqst, return rc; } - return __cifs_calc_signature(rqst, server, signature, server->secmech.md5); + return __cifs_calc_signature( + rqst, server, signature, + &(struct cifs_calc_sig_ctx){ .shash = server->secmech.md5 }); } /* must be called with server->srv_mutex held */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index e8fba98690ce..3bb74eea0e4f 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -632,9 +632,12 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const unsigned char *path, char *pbuf, unsigned int *pbytes_written); -int __cifs_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, char *signature, - struct shash_desc *shash); +struct cifs_calc_sig_ctx { + struct hmac_sha256_ctx *hmac; + struct shash_desc *shash; +}; +int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, + char *signature, struct cifs_calc_sig_ctx *ctx); enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, enum securityEnum); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index bde96eace8c9..89258accc220 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -254,10 +254,9 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, { int rc; unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; - unsigned char *sigptr = smb2_signature; struct kvec *iov = rqst->rq_iov; struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base; - struct shash_desc *shash = NULL; + struct hmac_sha256_ctx hmac_ctx; struct smb_rqst drqst; __u64 sid = le64_to_cpu(shdr->SessionId); u8 key[SMB2_NTLMV2_SESSKEY_SIZE]; @@ -272,30 +271,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE); - if (allocate_crypto) { - rc = cifs_alloc_hash("hmac(sha256)", &shash); - if (rc) { - cifs_server_dbg(VFS, - "%s: sha256 alloc failed\n", __func__); - goto out; - } - } else { - shash = server->secmech.hmacsha256; - } - - rc = crypto_shash_setkey(shash->tfm, key, sizeof(key)); - if (rc) { - cifs_server_dbg(VFS, - "%s: Could not update with response\n", - __func__); - goto out; - } - - rc = crypto_shash_init(shash); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not init sha256", __func__); - goto out; - } + hmac_sha256_init_usingrawkey(&hmac_ctx, key, sizeof(key)); /* * For SMB2+, __cifs_calc_signature() expects to sign only the actual @@ -306,25 +282,17 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, */ drqst = *rqst; if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) { - rc = crypto_shash_update(shash, iov[0].iov_base, - iov[0].iov_len); - if (rc) { - cifs_server_dbg(VFS, - "%s: Could not update with payload\n", - __func__); - goto out; - } + hmac_sha256_update(&hmac_ctx, iov[0].iov_base, iov[0].iov_len); drqst.rq_iov++; drqst.rq_nvec--; } - rc = __cifs_calc_signature(&drqst, server, sigptr, shash); + rc = __cifs_calc_signature( + &drqst, server, smb2_signature, + &(struct cifs_calc_sig_ctx){ .hmac = &hmac_ctx }); if (!rc) - memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); + memcpy(shdr->Signature, smb2_signature, SMB2_SIGNATURE_SIZE); -out: - if (allocate_crypto) - cifs_free_hash(&shash); return rc; } @@ -542,7 +510,6 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, { int rc; unsigned char smb3_signature[SMB2_CMACAES_SIZE]; - unsigned char *sigptr = smb3_signature; struct kvec *iov = rqst->rq_iov; struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base; struct shash_desc *shash = NULL; @@ -603,9 +570,11 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, drqst.rq_nvec--; } - rc = __cifs_calc_signature(&drqst, server, sigptr, shash); + rc = __cifs_calc_signature( + &drqst, server, smb3_signature, + &(struct cifs_calc_sig_ctx){ .shash = shash }); if (!rc) - memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); + memcpy(shdr->Signature, smb3_signature, SMB2_SIGNATURE_SIZE); out: if (allocate_crypto) -- cgit v1.2.3 From ae04b1bb06f8c1738d01dc2f9b9391c4480544e4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:34 -0700 Subject: smb: client: Use MD5 library for M-F symlink hashing Convert parse_mf_symlink() and format_mf_symlink() to use the MD5 library instead of a "md5" crypto_shash. This is simpler and faster. With the library there's no need to allocate memory, no need to handle errors, and the MD5 code is accessed directly without inefficient indirect calls and other unnecessary API overhead. This also fixes an issue where these functions did not work on kernels booted in FIPS mode. The use of MD5 here is for data integrity rather than a security purpose, so it can use a non-FIPS-approved algorithm. Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/Kconfig | 1 + fs/smb/client/link.c | 31 +++---------------------------- 2 files changed, 4 insertions(+), 28 deletions(-) diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig index f0c1ff8544f6..f5a980bdfc93 100644 --- a/fs/smb/client/Kconfig +++ b/fs/smb/client/Kconfig @@ -16,6 +16,7 @@ config CIFS select CRYPTO_ECB select CRYPTO_AES select CRYPTO_LIB_ARC4 + select CRYPTO_LIB_MD5 select CRYPTO_LIB_SHA256 select CRYPTO_LIB_SHA512 select KEYS diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index fe80e711cd75..70f3c0c67eeb 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -5,6 +5,7 @@ * Author(s): Steve French (sfrench@us.ibm.com) * */ +#include #include #include #include @@ -36,23 +37,6 @@ #define CIFS_MF_SYMLINK_MD5_FORMAT "%16phN\n" #define CIFS_MF_SYMLINK_MD5_ARGS(md5_hash) md5_hash -static int -symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) -{ - int rc; - struct shash_desc *md5 = NULL; - - rc = cifs_alloc_hash("md5", &md5); - if (rc) - return rc; - - rc = crypto_shash_digest(md5, link_str, link_len, md5_hash); - if (rc) - cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); - cifs_free_hash(&md5); - return rc; -} - static int parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, char **_link_str) @@ -77,11 +61,7 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) return -EINVAL; - rc = symlink_hash(link_len, link_str, md5_hash); - if (rc) { - cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); - return rc; - } + md5(link_str, link_len, md5_hash); scnprintf(md5_str2, sizeof(md5_str2), CIFS_MF_SYMLINK_MD5_FORMAT, @@ -103,7 +83,6 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, static int format_mf_symlink(u8 *buf, unsigned int buf_len, const char *link_str) { - int rc; unsigned int link_len; unsigned int ofs; u8 md5_hash[16]; @@ -116,11 +95,7 @@ format_mf_symlink(u8 *buf, unsigned int buf_len, const char *link_str) if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) return -ENAMETOOLONG; - rc = symlink_hash(link_len, link_str, md5_hash); - if (rc) { - cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); - return rc; - } + md5(link_str, link_len, md5_hash); scnprintf(buf, buf_len, CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT, -- cgit v1.2.3 From c04e55b257b42f5eb5a2c5e92ebd043fd75fe3ab Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:35 -0700 Subject: smb: client: Use MD5 library for SMB1 signature calculation Convert cifs_calc_signature() to use the MD5 library instead of a "md5" crypto_shash. This is simpler and faster. With the library there's no need to allocate memory, no need to handle errors, and the MD5 code is accessed directly without inefficient indirect calls and other unnecessary API overhead. To preserve the existing behavior of MD5 signature support being disabled when the kernel is booted with "fips=1", make cifs_calc_signature() check fips_enabled itself. Previously it relied on the error from cifs_alloc_hash("md5", &server->secmech.md5). Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/cifsencrypt.c | 34 +++++++++++++++++----------------- fs/smb/client/cifsproto.h | 1 + 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 9522088a1cfb..80215ba7a574 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -24,11 +24,16 @@ #include #include #include +#include #include static int cifs_sig_update(struct cifs_calc_sig_ctx *ctx, const u8 *data, size_t len) { + if (ctx->md5) { + md5_update(ctx->md5, data, len); + return 0; + } if (ctx->hmac) { hmac_sha256_update(ctx->hmac, data, len); return 0; @@ -38,6 +43,10 @@ static int cifs_sig_update(struct cifs_calc_sig_ctx *ctx, static int cifs_sig_final(struct cifs_calc_sig_ctx *ctx, u8 *out) { + if (ctx->md5) { + md5_final(ctx->md5, out); + return 0; + } if (ctx->hmac) { hmac_sha256_final(ctx->hmac, out); return 0; @@ -130,31 +139,22 @@ int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, static int cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, char *signature) { - int rc; + struct md5_ctx ctx; if (!rqst->rq_iov || !signature || !server) return -EINVAL; - - rc = cifs_alloc_hash("md5", &server->secmech.md5); - if (rc) - return -1; - - rc = crypto_shash_init(server->secmech.md5); - if (rc) { - cifs_dbg(VFS, "%s: Could not init md5\n", __func__); - return rc; + if (fips_enabled) { + cifs_dbg(VFS, + "MD5 signature support is disabled due to FIPS\n"); + return -EOPNOTSUPP; } - rc = crypto_shash_update(server->secmech.md5, - server->session_key.response, server->session_key.len); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with response\n", __func__); - return rc; - } + md5_init(&ctx); + md5_update(&ctx, server->session_key.response, server->session_key.len); return __cifs_calc_signature( rqst, server, signature, - &(struct cifs_calc_sig_ctx){ .shash = server->secmech.md5 }); + &(struct cifs_calc_sig_ctx){ .md5 = &ctx }); } /* must be called with server->srv_mutex held */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 3bb74eea0e4f..4976be2c47c1 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -633,6 +633,7 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, const unsigned char *path, char *pbuf, unsigned int *pbytes_written); struct cifs_calc_sig_ctx { + struct md5_ctx *md5; struct hmac_sha256_ctx *hmac; struct shash_desc *shash; }; -- cgit v1.2.3 From 395a77b030a878a353465386e8618b5272a480ca Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:36 -0700 Subject: smb: client: Use HMAC-MD5 library for NTLMv2 For the HMAC-MD5 computations in NTLMv2, use the HMAC-MD5 library instead of a "hmac(md5)" crypto_shash. This is simpler and faster. With the library there's no need to allocate memory, no need to handle errors, and the HMAC-MD5 code is accessed directly without inefficient indirect calls and other unnecessary API overhead. To preserve the existing behavior of NTLMv2 support being disabled when the kernel is booted with "fips=1", make setup_ntlmv2_rsp() check fips_enabled itself. Previously it relied on the error from cifs_alloc_hash("hmac(md5)", &hmacmd5). Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/cifsencrypt.c | 114 +++++++++----------------------------------- 1 file changed, 22 insertions(+), 92 deletions(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 80215ba7a574..bbcf3b05c19a 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -425,11 +425,11 @@ static __le64 find_timestamp(struct cifs_ses *ses) } static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, - const struct nls_table *nls_cp, struct shash_desc *hmacmd5) + const struct nls_table *nls_cp) { - int rc = 0; int len; char nt_hash[CIFS_NTHASH_SIZE]; + struct hmac_md5_ctx hmac_ctx; __le16 *user; wchar_t *domain; wchar_t *server; @@ -437,17 +437,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, /* calculate md4 hash of password */ E_md4hash(ses->password, nt_hash, nls_cp); - rc = crypto_shash_setkey(hmacmd5->tfm, nt_hash, CIFS_NTHASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set NT hash as a key, rc=%d\n", __func__, rc); - return rc; - } - - rc = crypto_shash_init(hmacmd5); - if (rc) { - cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc); - return rc; - } + hmac_md5_init_usingrawkey(&hmac_ctx, nt_hash, CIFS_NTHASH_SIZE); /* convert ses->user_name to unicode */ len = ses->user_name ? strlen(ses->user_name) : 0; @@ -462,12 +452,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, *(u16 *)user = 0; } - rc = crypto_shash_update(hmacmd5, (char *)user, 2 * len); + hmac_md5_update(&hmac_ctx, (const u8 *)user, 2 * len); kfree(user); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with user, rc=%d\n", __func__, rc); - return rc; - } /* convert ses->domainName to unicode and uppercase */ if (ses->domainName) { @@ -479,12 +465,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = cifs_strtoUTF16((__le16 *)domain, ses->domainName, len, nls_cp); - rc = crypto_shash_update(hmacmd5, (char *)domain, 2 * len); + hmac_md5_update(&hmac_ctx, (const u8 *)domain, 2 * len); kfree(domain); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with domain, rc=%d\n", __func__, rc); - return rc; - } } else { /* We use ses->ip_addr if no domain name available */ len = strlen(ses->ip_addr); @@ -494,25 +476,16 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, return -ENOMEM; len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len, nls_cp); - rc = crypto_shash_update(hmacmd5, (char *)server, 2 * len); + hmac_md5_update(&hmac_ctx, (const u8 *)server, 2 * len); kfree(server); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with server, rc=%d\n", __func__, rc); - return rc; - } } - rc = crypto_shash_final(hmacmd5, ntlmv2_hash); - if (rc) - cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc); - - return rc; + hmac_md5_final(&hmac_ctx, ntlmv2_hash); + return 0; } -static int -CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_desc *hmacmd5) +static void CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) { - int rc; struct ntlmv2_resp *ntlmv2 = (struct ntlmv2_resp *) (ses->auth_key.response + CIFS_SESS_KEY_SIZE); unsigned int hash_len; @@ -521,35 +494,15 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_ hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE + offsetof(struct ntlmv2_resp, challenge.key[0])); - rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc); - return rc; - } - - rc = crypto_shash_init(hmacmd5); - if (rc) { - cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc); - return rc; - } - if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) memcpy(ntlmv2->challenge.key, ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); else memcpy(ntlmv2->challenge.key, ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); - rc = crypto_shash_update(hmacmd5, ntlmv2->challenge.key, hash_len); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc); - return rc; - } - - /* Note that the MD5 digest over writes anon.challenge_key.key */ - rc = crypto_shash_final(hmacmd5, ntlmv2->ntlmv2_hash); - if (rc) - cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc); - - return rc; + /* Note that the HMAC-MD5 value overwrites ntlmv2->challenge.key */ + hmac_md5_usingrawkey(ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE, + ntlmv2->challenge.key, hash_len, + ntlmv2->ntlmv2_hash); } /* @@ -606,7 +559,6 @@ out: int setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) { - struct shash_desc *hmacmd5 = NULL; unsigned char *tiblob = NULL; /* target info blob */ struct ntlmv2_resp *ntlmv2; char ntlmv2_hash[16]; @@ -677,51 +629,29 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) ntlmv2->client_chal = cc; ntlmv2->reserved2 = 0; - rc = cifs_alloc_hash("hmac(md5)", &hmacmd5); - if (rc) { - cifs_dbg(VFS, "Could not allocate HMAC-MD5, rc=%d\n", rc); + if (fips_enabled) { + cifs_dbg(VFS, "NTLMv2 support is disabled due to FIPS\n"); + rc = -EOPNOTSUPP; goto unlock; } /* calculate ntlmv2_hash */ - rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp, hmacmd5); + rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp); if (rc) { cifs_dbg(VFS, "Could not get NTLMv2 hash, rc=%d\n", rc); goto unlock; } /* calculate first part of the client response (CR1) */ - rc = CalcNTLMv2_response(ses, ntlmv2_hash, hmacmd5); - if (rc) { - cifs_dbg(VFS, "Could not calculate CR1, rc=%d\n", rc); - goto unlock; - } + CalcNTLMv2_response(ses, ntlmv2_hash); /* now calculate the session key for NTLMv2 */ - rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc); - goto unlock; - } - - rc = crypto_shash_init(hmacmd5); - if (rc) { - cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc); - goto unlock; - } - - rc = crypto_shash_update(hmacmd5, ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc); - goto unlock; - } - - rc = crypto_shash_final(hmacmd5, ses->auth_key.response); - if (rc) - cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc); + hmac_md5_usingrawkey(ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE, + ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE, + ses->auth_key.response); + rc = 0; unlock: cifs_server_unlock(ses->server); - cifs_free_hash(&hmacmd5); setup_ntlmv2_rsp_ret: kfree_sensitive(tiblob); -- cgit v1.2.3 From 2c09630d09c64b6b46e3d59a0031bc1807f742c4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:37 -0700 Subject: smb: client: Remove obsolete crypto_shash allocations Now that the SMB client accesses MD5, HMAC-MD5, HMAC-SHA256, and SHA-512 only via the library API and not via crypto_shash, allocating crypto_shash objects for these algorithms is no longer necessary. Remove all these allocations, their corresponding kconfig selections, and their corresponding module soft dependencies. Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/Kconfig | 4 ---- fs/smb/client/cifsencrypt.c | 3 --- fs/smb/client/cifsfs.c | 4 ---- fs/smb/client/cifsglob.h | 3 --- fs/smb/client/smb2transport.c | 35 ++--------------------------------- 5 files changed, 2 insertions(+), 47 deletions(-) diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig index f5a980bdfc93..17bd368574e9 100644 --- a/fs/smb/client/Kconfig +++ b/fs/smb/client/Kconfig @@ -5,11 +5,7 @@ config CIFS select NLS select NLS_UCS2_UTILS select CRYPTO - select CRYPTO_MD5 - select CRYPTO_SHA256 - select CRYPTO_SHA512 select CRYPTO_CMAC - select CRYPTO_HMAC select CRYPTO_AEAD2 select CRYPTO_CCM select CRYPTO_GCM diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index bbcf3b05c19a..801824825ecf 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -693,9 +693,6 @@ void cifs_crypto_secmech_release(struct TCP_Server_Info *server) { cifs_free_hash(&server->secmech.aes_cmac); - cifs_free_hash(&server->secmech.hmacsha256); - cifs_free_hash(&server->secmech.md5); - cifs_free_hash(&server->secmech.sha512); if (server->secmech.enc) { crypto_free_aead(server->secmech.enc); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 05b1fa76e8cc..4f959f1e08d2 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -2139,13 +2139,9 @@ MODULE_DESCRIPTION "also older servers complying with the SNIA CIFS Specification)"); MODULE_VERSION(CIFS_VERSION); MODULE_SOFTDEP("ecb"); -MODULE_SOFTDEP("hmac"); -MODULE_SOFTDEP("md5"); MODULE_SOFTDEP("nls"); MODULE_SOFTDEP("aes"); MODULE_SOFTDEP("cmac"); -MODULE_SOFTDEP("sha256"); -MODULE_SOFTDEP("sha512"); MODULE_SOFTDEP("aead2"); MODULE_SOFTDEP("ccm"); MODULE_SOFTDEP("gcm"); diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index c5034cf9ac9e..16a00a61fd2c 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -222,9 +222,6 @@ struct session_key { /* crypto hashing related structure/fields, not specific to a sec mech */ struct cifs_secmech { - struct shash_desc *md5; /* md5 hash function, for CIFS/SMB1 signatures */ - struct shash_desc *hmacsha256; /* hmac-sha256 hash function, for SMB2 signatures */ - struct shash_desc *sha512; /* sha512 hash function, for SMB3.1.1 preauth hash */ struct shash_desc *aes_cmac; /* block-cipher based MAC function, for SMB3 signatures */ struct crypto_aead *enc; /* smb3 encryption AEAD TFM (AES-CCM and AES-GCM) */ diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 89258accc220..cd689bc27bfd 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -31,49 +31,18 @@ static int smb3_crypto_shash_allocate(struct TCP_Server_Info *server) { struct cifs_secmech *p = &server->secmech; - int rc; - - rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256); - if (rc) - goto err; - - rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac); - if (rc) - goto err; - return 0; -err: - cifs_free_hash(&p->hmacsha256); - return rc; + return cifs_alloc_hash("cmac(aes)", &p->aes_cmac); } int smb311_crypto_shash_allocate(struct TCP_Server_Info *server) { struct cifs_secmech *p = &server->secmech; - int rc = 0; - rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256); - if (rc) - return rc; - - rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac); - if (rc) - goto err; - - rc = cifs_alloc_hash("sha512", &p->sha512); - if (rc) - goto err; - - return 0; - -err: - cifs_free_hash(&p->aes_cmac); - cifs_free_hash(&p->hmacsha256); - return rc; + return cifs_alloc_hash("cmac(aes)", &p->aes_cmac); } - static int smb3_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { -- cgit v1.2.3 From 3c15a6df61bab034b087f00181408b1537a535bb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:38 -0700 Subject: smb: client: Consolidate cmac(aes) shash allocation Now that smb3_crypto_shash_allocate() and smb311_crypto_shash_allocate() are identical and only allocate "cmac(aes)", delete the latter and replace the call to it with the former. Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/sess.c | 2 +- fs/smb/client/smb2proto.h | 2 +- fs/smb/client/smb2transport.c | 10 +--------- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 0a8c2fcc9ded..ef3b498b0a02 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -584,7 +584,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, * to sign packets before we generate the channel signing key * (we sign with the session key) */ - rc = smb311_crypto_shash_allocate(chan->server); + rc = smb3_crypto_shash_allocate(chan->server); if (rc) { cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__); mutex_unlock(&ses->session_mutex); diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index e7cda885c39f..6eb86d134abc 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -295,7 +295,7 @@ extern int smb2_validate_and_copy_iov(unsigned int offset, extern void smb2_copy_fs_info_to_kstatfs( struct smb2_fs_full_size_info *pfs_inf, struct kstatfs *kst); -extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server); +extern int smb3_crypto_shash_allocate(struct TCP_Server_Info *server); extern void smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, struct kvec *iov, int nvec); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index cd689bc27bfd..ad6068e17a2a 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -27,16 +27,8 @@ #include "../common/smb2status.h" #include "smb2glob.h" -static int -smb3_crypto_shash_allocate(struct TCP_Server_Info *server) -{ - struct cifs_secmech *p = &server->secmech; - - return cifs_alloc_hash("cmac(aes)", &p->aes_cmac); -} - int -smb311_crypto_shash_allocate(struct TCP_Server_Info *server) +smb3_crypto_shash_allocate(struct TCP_Server_Info *server) { struct cifs_secmech *p = &server->secmech; -- cgit v1.2.3 From 7987b93e3a11a7a95ddf2b21563d3286661b999c Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 10 Oct 2025 12:41:48 +0200 Subject: drm/xe/svm: Ensure data will be migrated to system if indicated by madvise. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the location madvise() is set to DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM, the drm_pagemap in the SVM gpu fault handler will be set to NULL. However there is nothing that explicitly migrates the data to system if it is already present in device memory. In that case, set the device memory owner to NULL to ensure data gets properly migrated to system on page-fault. v2: - Remove redundant dpagemap assignment (Himal Prasad Ghimiray) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost #v1 Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251010104149.72783-2-thomas.hellstrom@linux.intel.com Fixes: 10aa5c806030 ("drm/gpusvm, drm/xe: Fix userptr to not allow device private pages") (cherry picked from commit 2cfcea7a745794f9b8e265a309717ca6ba335fc4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index b268ee0d2271..da2a412f80c0 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -1034,6 +1034,9 @@ retry: if (err) return err; + dpagemap = xe_vma_resolve_pagemap(vma, tile); + if (!dpagemap && !ctx.devmem_only) + ctx.device_private_page_owner = NULL; range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); if (IS_ERR(range)) @@ -1054,7 +1057,6 @@ retry: range_debug(range, "PAGE FAULT"); - dpagemap = xe_vma_resolve_pagemap(vma, tile); if (--migrate_try_count >= 0 && xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) { ktime_t migrate_start = xe_svm_stats_ktime_get(); -- cgit v1.2.3 From 6d36f65ba551d28710c3e1aaceecacf19df0cd8f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 08:30:15 -0700 Subject: drm/xe/kunit: Fix kerneldoc for parameterized tests Kunit's generate_params() was recently updated to take an additional test context parameter. Xe's IP and platform parameter generators were updated accordingly at the same time, but the new parameter was not added to the functions' kerneldoc, resulting in the following warnings: Warning: drivers/gpu/drm/xe/tests/xe_pci.c:78 function parameter 'test' not described in 'xe_pci_fake_data_gen_params' Warning: drivers/gpu/drm/xe/tests/xe_pci.c:254 function parameter 'test' not described in 'xe_pci_graphics_ip_gen_param' Warning: drivers/gpu/drm/xe/tests/xe_pci.c:278 function parameter 'test' not described in 'xe_pci_media_ip_gen_param' Warning: drivers/gpu/drm/xe/tests/xe_pci.c:302 function parameter 'test' not described in 'xe_pci_id_gen_param' Warning: drivers/gpu/drm/xe/tests/xe_pci.c:390 function parameter 'test' not described in 'xe_pci_live_device_gen_param' 5 warnings as errors Document the new parameter to eliminate the warnings and make CI happy. Fixes: b9a214b5f6aa ("kunit: Pass parameterized test context to generate_params()") Reviewed-by: Shuicheng Lin Link: https://lore.kernel.org/r/20251013153014.2362879-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper (cherry picked from commit 89e347f8a70165d1e8d88a93d875da7742c902ce) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/tests/xe_pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 69e2840c7ef0..663a79ec960d 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -66,6 +66,7 @@ KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc); /** * xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -242,6 +243,7 @@ KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); /** * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -266,6 +268,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); /** * xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -290,6 +293,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); /** * xe_pci_id_gen_param - Generate struct pci_device_id parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -376,6 +380,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); /** * xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters + * @test: test context object * @prev: the previously returned value, or NULL for the first iteration * @desc: the buffer for a parameter name * -- cgit v1.2.3 From 6a91af25cdbce2086d85cc4994cf791bda3a2c90 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 10 Oct 2025 17:20:21 +0100 Subject: drm/xe/migrate: don't misalign current bytes If current bytes exceeds the max copy size, ensure the clamped size still accounts for the XE_CACHELINE_BYTES alignment, otherwise we trigger the assert in xe_migrate_vram with the size now being out of alignment. Fixes: 8c2d61e0e916 ("drm/xe/migrate: don't overflow max copy size") Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6212 Signed-off-by: Matthew Auld Cc: Stuart Summers Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251010162020.190962-2-matthew.auld@intel.com (cherry picked from commit 641bcf8731d21b56760e3646a39a65f471e9efd1) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 569869a2b339..a36ce7dce8cc 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -2113,7 +2113,9 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, if (current_bytes & ~PAGE_MASK) { int pitch = 4; - current_bytes = min_t(int, current_bytes, S16_MAX * pitch); + current_bytes = min_t(int, current_bytes, + round_down(S16_MAX * pitch, + XE_CACHELINE_BYTES)); } __fence = xe_migrate_vram(m, current_bytes, -- cgit v1.2.3 From 225bc03d85427e7e3821d6f99f4f2d4a09350dda Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 10 Oct 2025 16:24:58 +0100 Subject: drm/xe/evict: drop bogus assert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This assert can trigger here with non pin_map users that select LATE_RESTORE, since the vmap is allowed to be NULL given that save/restore can now use the blitter instead. The check here doesn't seem to have much value anymore given that we no longer move pinned memory, so any existing vmap is left well alone, and doesn't need to be recreated upon restore, so just drop the assert here. Fixes: 86f69c26113c ("drm/xe: use backup object for pinned save/restore") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6213 Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20251010152457.177884-2-matthew.auld@intel.com (cherry picked from commit a10b4a69c7f8f596d2c5218fbe84430734fab3b2) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo_evict.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index d5dbc51e8612..bc5b4c5fab81 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -182,7 +182,6 @@ int xe_bo_evict_all(struct xe_device *xe) static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) { - struct xe_device *xe = xe_bo_device(bo); int ret; ret = xe_bo_restore_pinned(bo); @@ -201,13 +200,6 @@ static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) } } - /* - * We expect validate to trigger a move VRAM and our move code - * should setup the iosys map. - */ - xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) || - !iosys_map_is_null(&bo->vmap)); - return 0; } -- cgit v1.2.3 From 9af61fc91486c7ba93cf1ec3bd381978cac8308c Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Wed, 15 Oct 2025 22:47:10 +0300 Subject: ALSA: usb-audio: add volume quirks for MS LifeChat LX-3000 ID 045e:070f Microsoft Corp. LifeChat LX-3000 Headset has muted minimum Speaker Playback Volume, and 4 amixer steps were observed to produce 1 actual volume step. Apply min_mute quirk and correct res=48 -> 4*48. Tested with the device. Signed-off-by: Pauli Virtanen Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 8 ++++++++ sound/usb/quirks.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index ae412e651faf..6f00e0d52382 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1147,6 +1147,14 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, } break; + case USB_ID(0x045e, 0x070f): /* MS LifeChat LX-3000 Headset */ + if (!strcmp(kctl->id.name, "Speaker Playback Volume")) { + usb_audio_info(chip, + "set volume quirk for MS LifeChat LX-3000\n"); + cval->res = 192; + } + break; + case USB_ID(0x0471, 0x0101): case USB_ID(0x0471, 0x0104): case USB_ID(0x0471, 0x0105): diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 43793a5c3f51..dac469a8d07a 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2153,6 +2153,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { DEVICE_FLG(0x045e, 0x083c, /* MS USB Link headset */ QUIRK_FLAG_GET_SAMPLE_RATE | QUIRK_FLAG_CTL_MSG_DELAY | QUIRK_FLAG_DISABLE_AUTOSUSPEND), + DEVICE_FLG(0x045e, 0x070f, /* MS LifeChat LX-3000 Headset */ + QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE), DEVICE_FLG(0x046d, 0x0807, /* Logitech Webcam C500 */ QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_MIC_RES_384), DEVICE_FLG(0x046d, 0x0808, /* Logitech Webcam C600 */ -- cgit v1.2.3 From c6fceaf166479c05f7d3158ef08e78ae3e3dfa23 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Thu, 16 Oct 2025 00:11:30 +0300 Subject: ALSA: usb-audio: fix vendor quirk for Logitech H390 Vendor quirk QUIRK_FLAG_CTL_MSG_DELAY_1M was inadvertently missing when adding quirk for Logitech H390. Add it back. Fixes: 2b929b6eec0c ("ALSA: usb-audio: add mixer_playback_min_mute quirk for Logitech H390") Signed-off-by: Pauli Virtanen Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index dac469a8d07a..71638e6dfb20 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2183,6 +2183,7 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { DEVICE_FLG(0x046d, 0x09a4, /* Logitech QuickCam E 3500 */ QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x046d, 0x0a8f, /* Logitech H390 headset */ + QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE), DEVICE_FLG(0x0499, 0x1506, /* Yamaha THR5 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), -- cgit v1.2.3 From 5801e65206b065b0b2af032f7f1eef222aa2fd83 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 15 Oct 2025 09:40:15 +0100 Subject: drm/sched: Fix potential double free in drm_sched_job_add_resv_dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When adding dependencies with drm_sched_job_add_dependency(), that function consumes the fence reference both on success and failure, so in the latter case the dma_fence_put() on the error path (xarray failed to expand) is a double free. Interestingly this bug appears to have been present ever since commit ebd5f74255b9 ("drm/sched: Add dependency tracking"), since the code back then looked like this: drm_sched_job_add_implicit_dependencies(): ... for (i = 0; i < fence_count; i++) { ret = drm_sched_job_add_dependency(job, fences[i]); if (ret) break; } for (; i < fence_count; i++) dma_fence_put(fences[i]); Which means for the failing 'i' the dma_fence_put was already a double free. Possibly there were no users at that time, or the test cases were insufficient to hit it. The bug was then only noticed and fixed after commit 9c2ba265352a ("drm/scheduler: use new iterator in drm_sched_job_add_implicit_dependencies v2") landed, with its fixup of commit 4eaf02d6076c ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies"). At that point it was a slightly different flavour of a double free, which commit 963d0b356935 ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies harder") noticed and attempted to fix. But it only moved the double free from happening inside the drm_sched_job_add_dependency(), when releasing the reference not yet obtained, to the caller, when releasing the reference already released by the former in the failure case. As such it is not easy to identify the right target for the fixes tag so lets keep it simple and just continue the chain. While fixing we also improve the comment and explain the reason for taking the reference and not dropping it. Signed-off-by: Tvrtko Ursulin Fixes: 963d0b356935 ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies harder") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/dri-devel/aNFbXq8OeYl3QSdm@stanley.mountain/ Cc: Christian König Cc: Rob Clark Cc: Daniel Vetter Cc: Matthew Brost Cc: Danilo Krummrich Cc: Philipp Stanner Cc: Christian König Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org # v5.16+ Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20251015084015.6273-1-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/scheduler/sched_main.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 46119aacb809..c39f0245e3a9 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -965,13 +965,14 @@ int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job, dma_resv_assert_held(resv); dma_resv_for_each_fence(&cursor, resv, usage, fence) { - /* Make sure to grab an additional ref on the added fence */ - dma_fence_get(fence); - ret = drm_sched_job_add_dependency(job, fence); - if (ret) { - dma_fence_put(fence); + /* + * As drm_sched_job_add_dependency always consumes the fence + * reference (even when it fails), and dma_resv_for_each_fence + * is not obtaining one, we need to grab one before calling. + */ + ret = drm_sched_job_add_dependency(job, dma_fence_get(fence)); + if (ret) return ret; - } } return 0; } -- cgit v1.2.3 From 86f54f9b6c17d6567c69e3a6fed52fdf5d7dbe93 Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Wed, 15 Oct 2025 22:16:42 +0800 Subject: slab: reset slab->obj_ext when freeing and it is OBJEXTS_ALLOC_FAIL If obj_exts allocation failed, slab->obj_exts is set to OBJEXTS_ALLOC_FAIL, But we do not clear it when freeing the slab. Since OBJEXTS_ALLOC_FAIL and MEMCG_DATA_OBJEXTS currently share the same bit position, during the release of the associated folio, a VM_BUG_ON_FOLIO() check in folio_memcg_kmem() is triggered because the OBJEXTS_ALLOC_FAIL flag was not cleared, causing it to be interpreted as a kmem folio (non-slab) with MEMCG_OBJEXTS_DATA flag set, which is invalid because MEMCG_OBJEXTS_DATA is supposed to be set only on slabs. Another problem that predates sharing the OBJEXTS_ALLOC_FAIL and MEMCG_DATA_OBJEXTS bits is that on configurations with is_check_pages_enabled(), the non-cleared bit in page->memcg_data will trigger a free_page_is_bad() failure "page still charged to cgroup" When freeing a slab, we clear slab->obj_exts if the obj_ext array has been successfully allocated. So let's clear it also when the allocation has failed. Fixes: 09c46563ff6d ("codetag: debug: introduce OBJEXTS_ALLOC_FAIL to mark failed slab_ext allocations") Fixes: 7612833192d5 ("slab: Reuse first bit for OBJEXTS_ALLOC_FAIL") Link: https://lore.kernel.org/all/20251015141642.700170-1-hao.ge@linux.dev/ Cc: Signed-off-by: Hao Ge Reviewed-by: Suren Baghdasaryan Reviewed-by: Harry Yoo Signed-off-by: Vlastimil Babka --- mm/slub.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 13ae4491136a..a8fcc7e6f25a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2170,8 +2170,15 @@ static inline void free_slab_obj_exts(struct slab *slab) struct slabobj_ext *obj_exts; obj_exts = slab_obj_exts(slab); - if (!obj_exts) + if (!obj_exts) { + /* + * If obj_exts allocation failed, slab->obj_exts is set to + * OBJEXTS_ALLOC_FAIL. In this case, we end up here and should + * clear the flag. + */ + slab->obj_exts = 0; return; + } /* * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its -- cgit v1.2.3 From 6de1dec1c166c7f7324ce52ccfdf43e2fa743b19 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Oct 2025 05:27:15 +0000 Subject: udp: do not use skb_release_head_state() before skb_attempt_defer_free() Michal reported and bisected an issue after recent adoption of skb_attempt_defer_free() in UDP. The issue here is that skb_release_head_state() is called twice per skb, one time from skb_consume_udp(), then a second time from skb_defer_free_flush() and napi_consume_skb(). As Sabrina suggested, remove skb_release_head_state() call from skb_consume_udp(). Add a DEBUG_NET_WARN_ON_ONCE(skb_nfct(skb)) in skb_attempt_defer_free() Many thanks to Michal, Sabrina, Paolo and Florian for their help. Fixes: 6471658dc66c ("udp: use skb_attempt_defer_free()") Reported-and-bisected-by: Michal Kubecek Closes: https://lore.kernel.org/netdev/gpjh4lrotyephiqpuldtxxizrsg6job7cvhiqrw72saz2ubs3h@g6fgbvexgl3r/ Signed-off-by: Eric Dumazet Tested-by: Michal Kubecek Cc: Sabrina Dubroca Cc: Florian Westphal Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20251015052715.4140493-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/core/skbuff.c | 1 + net/ipv4/udp.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bc12790017b0..6be01454f262 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -7200,6 +7200,7 @@ nodefer: kfree_skb_napi_cache(skb); DEBUG_NET_WARN_ON_ONCE(skb_dst(skb)); DEBUG_NET_WARN_ON_ONCE(skb->destructor); + DEBUG_NET_WARN_ON_ONCE(skb_nfct(skb)); sdn = per_cpu_ptr(net_hotdata.skb_defer_nodes, cpu) + numa_node_id(); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 95241093b7f0..30dfbf73729d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1851,8 +1851,6 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) sk_peek_offset_bwd(sk, len); if (!skb_shared(skb)) { - if (unlikely(udp_skb_has_head_state(skb))) - skb_release_head_state(skb); skb_attempt_defer_free(skb); return; } -- cgit v1.2.3 From ed80cc4667ac997b84546e6d35f0a0ae525d239c Mon Sep 17 00:00:00 2001 From: Stuart Hayhurst Date: Mon, 6 Oct 2025 02:05:49 +0100 Subject: HID: logitech-hidpp: Add HIDPP_QUIRK_RESET_HI_RES_SCROLL The Logitech G502 Hero Wireless's high resolution scrolling resets after being unplugged without notifying the driver, causing extremely slow scrolling. The only indication of this is a battery update packet, so add a quirk to detect when the device is unplugged and re-enable the scrolling. Link: https://bugzilla.kernel.org/show_bug.cgi?id=218037 Signed-off-by: Stuart Hayhurst Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index aaef405a717e..5e763de4b94f 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -75,6 +75,7 @@ MODULE_PARM_DESC(disable_tap_to_click, #define HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS BIT(27) #define HIDPP_QUIRK_HI_RES_SCROLL_1P0 BIT(28) #define HIDPP_QUIRK_WIRELESS_STATUS BIT(29) +#define HIDPP_QUIRK_RESET_HI_RES_SCROLL BIT(30) /* These are just aliases for now */ #define HIDPP_QUIRK_KBD_SCROLL_WHEEL HIDPP_QUIRK_HIDPP_WHEELS @@ -193,6 +194,7 @@ struct hidpp_device { void *private_data; struct work_struct work; + struct work_struct reset_hi_res_work; struct kfifo delayed_work_fifo; struct input_dev *delayed_input; @@ -3836,6 +3838,7 @@ static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data, struct hidpp_report *answer = hidpp->send_receive_buf; struct hidpp_report *report = (struct hidpp_report *)data; int ret; + int last_online; /* * If the mutex is locked then we have a pending answer from a @@ -3877,6 +3880,7 @@ static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data, "See: https://gitlab.freedesktop.org/jwrdegoede/logitech-27mhz-keyboard-encryption-setup/\n"); } + last_online = hidpp->battery.online; if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP20_BATTERY) { ret = hidpp20_battery_event_1000(hidpp, data, size); if (ret != 0) @@ -3901,6 +3905,11 @@ static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data, return ret; } + if (hidpp->quirks & HIDPP_QUIRK_RESET_HI_RES_SCROLL) { + if (last_online == 0 && hidpp->battery.online == 1) + schedule_work(&hidpp->reset_hi_res_work); + } + if (hidpp->quirks & HIDPP_QUIRK_HIDPP_WHEELS) { ret = hidpp10_wheel_raw_event(hidpp, data, size); if (ret != 0) @@ -4274,6 +4283,13 @@ static void hidpp_connect_event(struct work_struct *work) hidpp->delayed_input = input; } +static void hidpp_reset_hi_res_handler(struct work_struct *work) +{ + struct hidpp_device *hidpp = container_of(work, struct hidpp_device, reset_hi_res_work); + + hi_res_scroll_enable(hidpp); +} + static DEVICE_ATTR(builtin_power_supply, 0000, NULL, NULL); static struct attribute *sysfs_attrs[] = { @@ -4404,6 +4420,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) } INIT_WORK(&hidpp->work, hidpp_connect_event); + INIT_WORK(&hidpp->reset_hi_res_work, hidpp_reset_hi_res_handler); mutex_init(&hidpp->send_mutex); init_waitqueue_head(&hidpp->wait); @@ -4499,6 +4516,7 @@ static void hidpp_remove(struct hid_device *hdev) hid_hw_stop(hdev); cancel_work_sync(&hidpp->work); + cancel_work_sync(&hidpp->reset_hi_res_work); mutex_destroy(&hidpp->send_mutex); } @@ -4546,6 +4564,9 @@ static const struct hid_device_id hidpp_devices[] = { { /* Keyboard MX5500 (Bluetooth-receiver in HID proxy mode) */ LDJ_DEVICE(0xb30b), .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS }, + { /* Logitech G502 Lightspeed Wireless Gaming Mouse */ + LDJ_DEVICE(0x407f), + .driver_data = HIDPP_QUIRK_RESET_HI_RES_SCROLL }, { LDJ_DEVICE(HID_ANY_ID) }, -- cgit v1.2.3 From 0c1999ed33722f85476a248186d6e0eb2bf3dd2a Mon Sep 17 00:00:00 2001 From: Xing Guo Date: Thu, 16 Oct 2025 11:53:30 +0800 Subject: selftests: arg_parsing: Ensure data is flushed to disk before reading. test_parse_test_list_file writes some data to /tmp/bpf_arg_parsing_test.XXXXXX and parse_test_list_file() will read the data back. However, after writing data to that file, we forget to call fsync() and it's causing testing failure in my laptop. This patch helps fix it by adding the missing fsync() call. Fixes: 64276f01dce8 ("selftests/bpf: Test_progs can read test lists from file") Signed-off-by: Xing Guo Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20251016035330.3217145-1-higuoxing@gmail.com --- tools/testing/selftests/bpf/prog_tests/arg_parsing.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c index fbf0d9c2f58b..e27d66b75fb1 100644 --- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c +++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c @@ -144,6 +144,9 @@ static void test_parse_test_list_file(void) if (!ASSERT_OK(ferror(fp), "prepare tmp")) goto out_fclose; + if (!ASSERT_OK(fsync(fileno(fp)), "fsync tmp")) + goto out_fclose; + init_test_filter_set(&set); if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file")) -- cgit v1.2.3 From f6fddc6df3fc0cffce329b87927db4eb5989728d Mon Sep 17 00:00:00 2001 From: Shardul Bankar Date: Thu, 16 Oct 2025 12:03:30 +0530 Subject: bpf: Fix memory leak in __lookup_instance error path When __lookup_instance() allocates a func_instance structure but fails to allocate the must_write_set array, it returns an error without freeing the previously allocated func_instance. This causes a memory leak of 192 bytes (sizeof(struct func_instance)) each time this error path is triggered. Fix by freeing 'result' on must_write_set allocation failure. Fixes: b3698c356ad9 ("bpf: callchain sensitive stack liveness tracking using CFG") Reported-by: BPF Runtime Fuzzer (BRF) Signed-off-by: Shardul Bankar Signed-off-by: Martin KaFai Lau Acked-by: Eduard Zingerman Link: https://patch.msgid.link/20251016063330.4107547-1-shardulsb08@gmail.com --- kernel/bpf/liveness.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c index 3c611aba7f52..1e6538f59a78 100644 --- a/kernel/bpf/liveness.c +++ b/kernel/bpf/liveness.c @@ -195,8 +195,10 @@ static struct func_instance *__lookup_instance(struct bpf_verifier_env *env, return ERR_PTR(-ENOMEM); result->must_write_set = kvcalloc(subprog_sz, sizeof(*result->must_write_set), GFP_KERNEL_ACCOUNT); - if (!result->must_write_set) + if (!result->must_write_set) { + kvfree(result); return ERR_PTR(-ENOMEM); + } memcpy(&result->callchain, callchain, sizeof(*callchain)); result->insn_cnt = subprog_sz; hash_add(liveness->func_instances, &result->hl_node, key); -- cgit v1.2.3 From 5a869d017793399fd1d2609ff27e900534173eb3 Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Fri, 10 Oct 2025 17:19:42 +1000 Subject: nvme/tcp: handle tls partially sent records in write_space() With TLS enabled, records that are encrypted and appended to TLS TX list can fail to see a retry if the underlying TCP socket is busy, for example, hitting an EAGAIN from tcp_sendmsg_locked(). This is not known to the NVMe TCP driver, as the TLS layer successfully generated a record. Typically, the TLS write_space() callback would ensure such records are retried, but in the NVMe TCP Host driver, write_space() invokes nvme_tcp_write_space(). This causes a partially sent record in the TLS TX list to timeout after not being retried. This patch fixes the above by calling queue->write_space(), which calls into the TLS layer to retry any pending records. Fixes: be8e82caa685 ("nvme-tcp: enable TLS handshake upcall") Signed-off-by: Wilfred Mallawa Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1413788ca7d5..9a96df1a511c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1081,6 +1081,9 @@ static void nvme_tcp_write_space(struct sock *sk) queue = sk->sk_user_data; if (likely(queue && sk_stream_is_writeable(sk))) { clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + /* Ensure pending TLS partial records are retried */ + if (nvme_tcp_queue_tls(queue)) + queue->write_space(sk); queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } read_unlock_bh(&sk->sk_callback_lock); -- cgit v1.2.3 From 95af8155746cbbc9e97ceb16a4977c095da1eb40 Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Wed, 15 Oct 2025 15:16:11 +0530 Subject: drm/xe: Prevent runtime PM wake while reading rp0 frequency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rp0 frequency is a fused value that is read once during probe and then cached, so there’s no need to trigger a runtime wake when accessing rp0. Signed-off-by: Badal Nilawar Reviewed-by: Vinay Belgaumkar Reviewed-by: Karthik Poosa Link: https://lore.kernel.org/r/20251015094611.1468939-1-badal.nilawar@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_freq.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 4ff1b6b58d6b..701349251bbc 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -99,13 +99,8 @@ static ssize_t rp0_freq_show(struct kobject *kobj, { struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); - u32 freq; - xe_pm_runtime_get(dev_to_xe(dev)); - freq = xe_guc_pc_get_rp0_freq(pc); - xe_pm_runtime_put(dev_to_xe(dev)); - - return sysfs_emit(buf, "%d\n", freq); + return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc)); } static struct kobj_attribute attr_rp0_freq = __ATTR_RO(rp0_freq); -- cgit v1.2.3 From 4a9cb2eecc78fa9d388481762dd798fa770e1971 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 10 Oct 2025 19:43:49 +0200 Subject: docs: rust: add section on imports formatting `rustfmt`, by default, formats imports in a way that is prone to conflicts while merging and rebasing, since in some cases it condenses several items into the same line. For instance, Linus mentioned [1] that the following case: use crate::{ fmt, page::AsPageIter, }; is compressed by `rustfmt` into: use crate::{fmt, page::AsPageIter}; which is undesirable. Similarly, `rustfmt` may put several items in the same line even if the braces span already multiple lines, e.g.: use kernel::{ acpi, c_str, device::{property, Core}, of, platform, }; The options that control the formatting behavior around imports are generally unstable, and `rustfmt` releases do not allow to use nightly features, unlike the compiler and other Rust tooling [2]. For the moment, we can introduce a workaround to prevent `rustfmt` from compressing the example above -- the "trailing empty comment": use crate::{ fmt, page::AsPageIter, // }; which is reminiscent of the trailing comma behavior in other formatters. We already used empty comments for formatting purposes in the past, e.g. in commit b9b701fce49a ("rust: clarify the language unstable features in use"). In addition, `rustfmt` actually reformats with a vertical layout (i.e. it does not put two items in the same line) when seeing such a comment, i.e. it doesn't just preserve the formatting, which is good in the sense that we can use it to easily reformat some imports, since it matches the style we generally want to have. A Git merge driver would help (suggested by Gary and Wedson), though maintainers would need to set it up, the diffs would still be larger and the formatting rules for imports would remain hard to predict. Thus document the style that we will follow in the coding guidelines by introducing a new section and explain how the trailing empty comment works there too. We discussed the issue with upstream Rust in our usual Rust <-> Rust for Linux meeting [3], and there have also been a few other discussions in parallel in issues [4][5] and Zulip [6]. We will see what happens, but upstream Rust has already created a subteam of `rustfmt` to try to overcome the bandwidth issue [7], which is a good signal, and some organization work has already started (e.g. tracking issues). We will continue our discussions with them about it. Cc: Caleb Cartwright Cc: Yacin Tmimi Cc: Manish Goregaokar Cc: Deadbeef Cc: Cameron Steffen Cc: Jieyou Xu Link: https://lore.kernel.org/all/CAHk-=wgO7S_FZUSBbngG5vtejWOpzDfTTBkVvP3_yjJmFddbzA@mail.gmail.com/ [1] Link: https://github.com/rust-lang/rustfmt/issues/4884 [2] Link: https://hackmd.io/iSCyY3JTTz-g8YM-nnzTTA [3] Link: https://github.com/rust-lang/rustfmt/issues/4991 [4] Link: https://github.com/rust-lang/rustfmt/issues/3361 [5] Link: https://rust-lang.zulipchat.com/#narrow/channel/392734-council/topic/rustfmt.20maintenance/near/543815381 [6] Link: https://github.com/rust-lang/team/pull/2017 [7] Reviewed-by: Benno Lossin Signed-off-by: Miguel Ojeda --- Documentation/rust/coding-guidelines.rst | 75 ++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/Documentation/rust/coding-guidelines.rst b/Documentation/rust/coding-guidelines.rst index 6ff9e754755d..3198be3a6d63 100644 --- a/Documentation/rust/coding-guidelines.rst +++ b/Documentation/rust/coding-guidelines.rst @@ -38,6 +38,81 @@ Like ``clang-format`` for the rest of the kernel, ``rustfmt`` works on individual files, and does not require a kernel configuration. Sometimes it may even work with broken code. +Imports +~~~~~~~ + +``rustfmt``, by default, formats imports in a way that is prone to conflicts +while merging and rebasing, since in some cases it condenses several items into +the same line. For instance: + +.. code-block:: rust + + // Do not use this style. + use crate::{ + example1, + example2::{example3, example4, example5}, + example6, example7, + example8::example9, + }; + +Instead, the kernel uses a vertical layout that looks like this: + +.. code-block:: rust + + use crate::{ + example1, + example2::{ + example3, + example4, + example5, // + }, + example6, + example7, + example8::example9, // + }; + +That is, each item goes into its own line, and braces are used as soon as there +is more than one item in a list. + +The trailing empty comment allows to preserve this formatting. Not only that, +``rustfmt`` will actually reformat imports vertically when the empty comment is +added. That is, it is possible to easily reformat the original example into the +expected style by running ``rustfmt`` on an input like: + +.. code-block:: rust + + // Do not use this style. + use crate::{ + example1, + example2::{example3, example4, example5, // + }, + example6, example7, + example8::example9, // + }; + +The trailing empty comment works for nested imports, as shown above, as well as +for single item imports -- this can be useful to minimize diffs within patch +series: + +.. code-block:: rust + + use crate::{ + example1, // + }; + +The trailing empty comment works in any of the lines within the braces, but it +is preferred to keep it in the last item, since it is reminiscent of the +trailing comma in other formatters. Sometimes it may be simpler to avoid moving +the comment several times within a patch series due to changes in the list. + +There may be cases where exceptions may need to be made, i.e. none of this is +a hard rule. There is also code that is not migrated to this style yet, but +please do not introduce code in other styles. + +Eventually, the goal is to get ``rustfmt`` to support this formatting style (or +a similar one) automatically in a stable release without requiring the trailing +empty comment. Thus, at some point, the goal is to remove those comments. + Comments -------- -- cgit v1.2.3 From 8a7c601e14576a22c2bbf7f67455ccf3f3d2737f Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 10 Oct 2025 19:43:50 +0200 Subject: rust: alloc: employ a trailing comment to keep vertical layout Apply the formatting guidelines introduced in the previous commit to make the file `rustfmt`-clean again. Reviewed-by: Benno Lossin Signed-off-by: Miguel Ojeda --- rust/kernel/alloc/kvec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index e94aebd084c8..ac8d6f763ae8 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -9,7 +9,7 @@ use super::{ }; use crate::{ fmt, - page::AsPageIter, + page::AsPageIter, // }; use core::{ borrow::{Borrow, BorrowMut}, -- cgit v1.2.3 From 32f072d9eaf9c31c2b0527a4a3370570a731e3cc Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 10 Oct 2025 19:43:51 +0200 Subject: rust: cpufreq: fix formatting We do our best to keep the repository `rustfmt`-clean, thus run the tool to fix the formatting issue. Link: https://docs.kernel.org/rust/coding-guidelines.html#style-formatting Link: https://rust-for-linux.com/contributing#submit-checklist-addendum Fixes: f97aef092e19 ("cpufreq: Make drivers using CPUFREQ_ETERNAL specify transition latency") Acked-by: Viresh Kumar Reviewed-by: Benno Lossin Signed-off-by: Miguel Ojeda --- rust/kernel/cpufreq.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index 21b5b9b8acc1..1a555fcb120a 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -38,8 +38,7 @@ use macros::vtable; const CPUFREQ_NAME_LEN: usize = bindings::CPUFREQ_NAME_LEN as usize; /// Default transition latency value in nanoseconds. -pub const DEFAULT_TRANSITION_LATENCY_NS: u32 = - bindings::CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; +pub const DEFAULT_TRANSITION_LATENCY_NS: u32 = bindings::CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; /// CPU frequency driver flags. pub mod flags { -- cgit v1.2.3 From bce13d6ecd6c0e6c3376ac53617b30d19ce2a5c6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 15 Oct 2025 14:03:20 +0200 Subject: drm/gpusvm, drm/xe: Allow mixed mappings for userptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compute kernels often issue memory copies immediately after completion. If the memory being copied is an SVM pointer that was faulted into the device and then bound via userptr, it is undesirable to move that memory. Worse, if userptr is mixed between system and device memory, the bind operation may be rejected. Xe already has the necessary plumbing to support userptr with mixed mappings. This update modifies GPUSVM's get_pages to correctly locate pages in such mixed mapping scenarios. v2: - Rebase (Thomas Hellström) v3: - Remove Fixes tag. v4: - Break out from series since the other patch was merged. - Update patch subject, ensure dri-devel and Maarten are CC'd. Cc: Maarten Lankhorst Cc: dri-devel@lists.freedesktop.org Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Reviewed-by: Matthew Auld Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251015120320.176338-1-thomas.hellstrom@linux.intel.com Acked-by: Maarten Lankhorst Signed-off-by: Thomas Hellström --- drivers/gpu/drm/drm_gpusvm.c | 6 ++++-- drivers/gpu/drm/xe/xe_userptr.c | 4 +++- include/drm/drm_gpusvm.h | 4 ++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index cb906765897e..73e550c8ff8c 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1363,7 +1363,8 @@ map_pages: order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); if (is_device_private_page(page) || is_device_coherent_page(page)) { - if (zdd != page->zone_device_data && i > 0) { + if (!ctx->allow_mixed && + zdd != page->zone_device_data && i > 0) { err = -EOPNOTSUPP; goto err_unmap; } @@ -1399,7 +1400,8 @@ map_pages: } else { dma_addr_t addr; - if (is_zone_device_page(page) || pagemap) { + if (is_zone_device_page(page) || + (pagemap && !ctx->allow_mixed)) { err = -EOPNOTSUPP; goto err_unmap; } diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c index f16e92cd8090..0d9130b1958a 100644 --- a/drivers/gpu/drm/xe/xe_userptr.c +++ b/drivers/gpu/drm/xe/xe_userptr.c @@ -3,6 +3,7 @@ * Copyright © 2025 Intel Corporation */ +#include "xe_svm.h" #include "xe_userptr.h" #include @@ -54,7 +55,8 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) struct xe_device *xe = vm->xe; struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), - .device_private_page_owner = NULL, + .device_private_page_owner = xe_svm_devm_owner(xe), + .allow_mixed = true, }; lockdep_assert_held(&vm->lock); diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index b92faa9a26b2..632e100e6efb 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -235,6 +235,9 @@ struct drm_gpusvm { * @read_only: operating on read-only memory * @devmem_possible: possible to use device memory * @devmem_only: use only device memory + * @allow_mixed: Allow mixed mappings in get pages. Mixing between system and + * single dpagemap is supported, mixing between multiple dpagemap + * is unsupported. * * Context that is DRM GPUSVM is operating in (i.e. user arguments). */ @@ -246,6 +249,7 @@ struct drm_gpusvm_ctx { unsigned int read_only :1; unsigned int devmem_possible :1; unsigned int devmem_only :1; + unsigned int allow_mixed :1; }; int drm_gpusvm_init(struct drm_gpusvm *gpusvm, -- cgit v1.2.3 From b3af8658ec70f2196190c66103478352286aba3b Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 15 Oct 2025 19:07:25 +0200 Subject: drm/xe: Retain vma flags when recreating and splitting vmas for madvise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When splitting and restoring vmas for madvise, we only copied the XE_VMA_SYSTEM_ALLOCATOR flag. That meant we lost flags for read_only, dumpable and sparse (in case anyone would call madvise for the latter). Instead, define a mask of relevant flags and ensure all are replicated, To simplify this and make the code a bit less fragile, remove the conversion to VMA_CREATE flags and instead just pass around the gpuva flags after initial conversion from user-space. Fixes: a2eb8aec3ebe ("drm/xe: Reset VMA attributes to default in SVM garbage collector") Cc: Matthew Brost Cc: Himal Prasad Ghimiray Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251015170726.178685-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_pt.c | 4 +- drivers/gpu/drm/xe/xe_vm.c | 86 ++++++++++++++-------------------------- drivers/gpu/drm/xe/xe_vm_types.h | 9 +---- 3 files changed, 32 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index e39da95f3a05..d22fd1ccc0ba 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2022,7 +2022,7 @@ static int op_prepare(struct xe_vm *vm, case DRM_GPUVA_OP_MAP: if ((!op->map.immediate && xe_vm_in_fault_mode(vm) && !op->map.invalidate_on_bind) || - op->map.is_cpu_addr_mirror) + (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) break; err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma, @@ -2252,7 +2252,7 @@ static void op_commit(struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) || - op->map.is_cpu_addr_mirror) + (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) break; bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 08bc55bd91d7..c3230d3f9e6f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -642,6 +642,12 @@ static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, vops->pt_update_ops[i].num_ops += inc_val; } +#define XE_VMA_CREATE_MASK ( \ + XE_VMA_READ_ONLY | \ + XE_VMA_DUMPABLE | \ + XE_VMA_SYSTEM_ALLOCATOR | \ + DRM_GPUVA_SPARSE) + static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, u8 tile_mask) { @@ -654,8 +660,7 @@ static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, op->base.map.gem.offset = vma->gpuva.gem.offset; op->map.vma = vma; op->map.immediate = true; - op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; - op->map.is_null = xe_vma_is_null(vma); + op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; } static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, @@ -958,11 +963,6 @@ static void xe_vma_free(struct xe_vma *vma) kfree(vma); } -#define VMA_CREATE_FLAG_READ_ONLY BIT(0) -#define VMA_CREATE_FLAG_IS_NULL BIT(1) -#define VMA_CREATE_FLAG_DUMPABLE BIT(2) -#define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) - static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, @@ -973,11 +973,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_vma *vma; struct xe_tile *tile; u8 id; - bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); - bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); - bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); - bool is_cpu_addr_mirror = - (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); + bool is_null = (flags & DRM_GPUVA_SPARSE); + bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); @@ -998,10 +995,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (!vma) return ERR_PTR(-ENOMEM); - if (is_cpu_addr_mirror) - vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; - if (is_null) - vma->gpuva.flags |= DRM_GPUVA_SPARSE; if (bo) vma->gpuva.gem.obj = &bo->ttm.base; } @@ -1012,10 +1005,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.vm = &vm->gpuvm; vma->gpuva.va.addr = start; vma->gpuva.va.range = end - start + 1; - if (read_only) - vma->gpuva.flags |= XE_VMA_READ_ONLY; - if (dumpable) - vma->gpuva.flags |= XE_VMA_DUMPABLE; + vma->gpuva.flags = flags; for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; @@ -2299,12 +2289,14 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, if (__op->op == DRM_GPUVA_OP_MAP) { op->map.immediate = flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; - op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; - op->map.is_cpu_addr_mirror = flags & - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; - op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_READONLY) + op->map.vma_flags |= XE_VMA_READ_ONLY; + if (flags & DRM_XE_VM_BIND_FLAG_NULL) + op->map.vma_flags |= DRM_GPUVA_SPARSE; + if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; + if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) + op->map.vma_flags |= XE_VMA_DUMPABLE; op->map.pat_index = pat_index; op->map.invalidate_on_bind = __xe_vm_needs_clear_scratch_pages(vm, flags); @@ -2617,14 +2609,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, .pat_index = op->map.pat_index, }; - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->map.is_null ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->map.dumpable ? - VMA_CREATE_FLAG_DUMPABLE : 0; - flags |= op->map.is_cpu_addr_mirror ? - VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; + flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; vma = new_vma(vm, &op->base.map, &default_attr, flags); @@ -2633,7 +2618,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, op->map.vma = vma; if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && - !op->map.is_cpu_addr_mirror) || + !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || op->map.invalidate_on_bind) xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); @@ -2664,18 +2649,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, op->remap.start = xe_vma_start(old); op->remap.range = xe_vma_size(old); - flags |= op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->base.remap.unmap->va->flags & - XE_VMA_DUMPABLE ? - VMA_CREATE_FLAG_DUMPABLE : 0; - flags |= xe_vma_is_cpu_addr_mirror(old) ? - VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; - + flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; if (op->base.remap.prev) { vma = new_vma(vm, op->base.remap.prev, &old->attr, flags); @@ -4239,7 +4213,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, struct xe_vma_ops vops; struct drm_gpuva_ops *ops = NULL; struct drm_gpuva_op *__op; - bool is_cpu_addr_mirror = false; + unsigned int vma_flags = 0; bool remap_op = false; struct xe_vma_mem_attr tmp_attr; u16 default_pat; @@ -4269,15 +4243,17 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, vma = gpuva_to_vma(op->base.unmap.va); XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); default_pat = vma->attr.default_pat_index; + vma_flags = vma->gpuva.flags; } if (__op->op == DRM_GPUVA_OP_REMAP) { vma = gpuva_to_vma(op->base.remap.unmap->va); default_pat = vma->attr.default_pat_index; + vma_flags = vma->gpuva.flags; } if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.is_cpu_addr_mirror = true; + op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; op->map.pat_index = default_pat; } } else { @@ -4286,11 +4262,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, xe_assert(vm->xe, !remap_op); xe_assert(vm->xe, xe_vma_has_no_bo(vma)); remap_op = true; - - if (xe_vma_is_cpu_addr_mirror(vma)) - is_cpu_addr_mirror = true; - else - is_cpu_addr_mirror = false; + vma_flags = vma->gpuva.flags; } if (__op->op == DRM_GPUVA_OP_MAP) { @@ -4299,10 +4271,10 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, /* * In case of madvise ops DRM_GPUVA_OP_MAP is * always after DRM_GPUVA_OP_REMAP, so ensure - * we assign op->map.is_cpu_addr_mirror true - * if REMAP is for xe_vma_is_cpu_addr_mirror vma + * to propagate the flags from the vma we're + * unmapping. */ - op->map.is_cpu_addr_mirror = is_cpu_addr_mirror; + op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; } } print_op(vm->xe, __op); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 413353e1c225..a3b422b27ae8 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -345,17 +345,10 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; + unsigned int vma_flags; /** @immediate: Immediate bind */ bool immediate; /** @read_only: Read only */ - bool read_only; - /** @is_null: is NULL binding */ - bool is_null; - /** @is_cpu_addr_mirror: is CPU address mirror binding */ - bool is_cpu_addr_mirror; - /** @dumpable: whether BO is dumped on GPU hang */ - bool dumpable; - /** @invalidate: invalidate the VMA before bind */ bool invalidate_on_bind; /** @pat_index: The pat index to use for this operation. */ u16 pat_index; -- cgit v1.2.3 From 59a2d3f38ab23cce4cd9f0c4a5e08fdfe9e67ae7 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 15 Oct 2025 19:07:26 +0200 Subject: drm/xe/uapi: Hide the madvise autoreset behind a VM_BIND flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The madvise implementation currently resets the SVM madvise if the underlying CPU map is unmapped. This is in an attempt to mimic the CPU madvise behaviour. However, it's not clear that this is a desired behaviour since if the end app user relies on it for malloc()ed objects or stack objects, it may not work as intended. Instead of having the autoreset functionality being a direct application-facing implicit UAPI, make the UMD explicitly choose this behaviour if it wants to expose it by introducing DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET, and add a semantics description. v2: - Kerneldoc fixes. Fix a commit log message. Fixes: a2eb8aec3ebe ("drm/xe: Reset VMA attributes to default in SVM garbage collector") Cc: Matthew Brost Cc: Himal Prasad Ghimiray Cc: "Falkowski, John" Cc: "Mrozek, Michal" Signed-off-by: Thomas Hellström Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251015170726.178685-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_svm.c | 5 +++++ drivers/gpu/drm/xe/xe_vm.c | 12 +++++++++--- drivers/gpu/drm/xe/xe_vm_types.h | 1 + include/uapi/drm/xe_drm.h | 15 +++++++++++++++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index da2a412f80c0..129e7818565c 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -302,6 +302,11 @@ static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 if (!vma) return -EINVAL; + if (!(vma->gpuva.flags & XE_VMA_MADV_AUTORESET)) { + drm_dbg(&vm->xe->drm, "Skipping madvise reset for vma.\n"); + return 0; + } + if (xe_vma_has_default_mem_attrs(vma)) return 0; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c3230d3f9e6f..10d77666a425 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -646,7 +646,8 @@ static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, XE_VMA_READ_ONLY | \ XE_VMA_DUMPABLE | \ XE_VMA_SYSTEM_ALLOCATOR | \ - DRM_GPUVA_SPARSE) + DRM_GPUVA_SPARSE | \ + XE_VMA_MADV_AUTORESET) static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, u8 tile_mask) @@ -2297,6 +2298,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) op->map.vma_flags |= XE_VMA_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) + op->map.vma_flags |= XE_VMA_MADV_AUTORESET; op->map.pat_index = pat_index; op->map.invalidate_on_bind = __xe_vm_needs_clear_scratch_pages(vm, flags); @@ -3280,7 +3283,8 @@ ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE | \ DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ + DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) #ifdef TEST_VM_OPS_ERROR #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) @@ -3395,7 +3399,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && !(BIT(prefetch_region) & xe->info.mem_region_mask))) || XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_UNMAP)) { + op == DRM_XE_VM_BIND_OP_UNMAP) || + XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && + (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { err = -EINVAL; goto free_bind_ops; } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index a3b422b27ae8..d6e2a0fdd4b3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -46,6 +46,7 @@ struct xe_vm_pgtable_update_op; #define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 7) #define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8) #define XE_VMA_SYSTEM_ALLOCATOR (DRM_GPUVA_USERBITS << 9) +#define XE_VMA_MADV_AUTORESET (DRM_GPUVA_USERBITS << 10) /** * struct xe_vma_mem_attr - memory attributes associated with vma diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 2d7945cda739..47853659a705 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1017,6 +1017,20 @@ struct drm_xe_vm_destroy { * valid on VMs with DRM_XE_VM_CREATE_FLAG_FAULT_MODE set. The CPU address * mirror flag are only valid for DRM_XE_VM_BIND_OP_MAP operations, the BO * handle MBZ, and the BO offset MBZ. + * - %DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET - Can be used in combination with + * %DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR to reset madvises when the underlying + * CPU address space range is unmapped (typically with munmap(2) or brk(2)). + * The madvise values set with &DRM_IOCTL_XE_MADVISE are reset to the values + * that were present immediately after the &DRM_IOCTL_XE_VM_BIND. + * The reset GPU virtual address range is the intersection of the range bound + * using &DRM_IOCTL_XE_VM_BIND and the virtual CPU address space range + * unmapped. + * This functionality is present to mimic the behaviour of CPU address space + * madvises set using madvise(2), which are typically reset on unmap. + * Note: free(3) may or may not call munmap(2) and/or brk(2), and may thus + * not invoke autoreset. Neither will stack variables going out of scope. + * Therefore it's recommended to always explicitly reset the madvises when + * freeing the memory backing a region used in a &DRM_IOCTL_XE_MADVISE call. * * The @prefetch_mem_region_instance for %DRM_XE_VM_BIND_OP_PREFETCH can also be: * - %DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC, which ensures prefetching occurs in @@ -1123,6 +1137,7 @@ struct drm_xe_vm_bind_op { #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) #define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4) #define DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR (1 << 5) +#define DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET (1 << 6) /** @flags: Bind flags */ __u32 flags; -- cgit v1.2.3 From 1f1d3e1d094db732d22b892227bf1e1ac3a8ca04 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 17 Oct 2025 00:57:54 +0200 Subject: rust: bitmap: fix formatting We do our best to keep the repository `rustfmt`-clean, thus run the tool to fix the formatting issue. Link: https://docs.kernel.org/rust/coding-guidelines.html#style-formatting Link: https://rust-for-linux.com/contributing#submit-checklist-addendum Fixes: 0f5878834d6c ("rust: bitmap: clean Rust 1.92.0 `unused_unsafe` warning") Reviewed-by: Burak Emir Signed-off-by: Miguel Ojeda --- rust/kernel/bitmap.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rust/kernel/bitmap.rs b/rust/kernel/bitmap.rs index 711b8368b38f..aa8fc7bf06fc 100644 --- a/rust/kernel/bitmap.rs +++ b/rust/kernel/bitmap.rs @@ -167,7 +167,9 @@ impl core::ops::Deref for BitmapVec { let ptr = if self.nbits <= BITS_PER_LONG { // SAFETY: Bitmap is represented inline. #[allow(unused_unsafe, reason = "Safe since Rust 1.92.0")] - unsafe { core::ptr::addr_of!(self.repr.bitmap) } + unsafe { + core::ptr::addr_of!(self.repr.bitmap) + } } else { // SAFETY: Bitmap is represented as array of `unsigned long`. unsafe { self.repr.ptr.as_ptr() } @@ -184,7 +186,9 @@ impl core::ops::DerefMut for BitmapVec { let ptr = if self.nbits <= BITS_PER_LONG { // SAFETY: Bitmap is represented inline. #[allow(unused_unsafe, reason = "Safe since Rust 1.92.0")] - unsafe { core::ptr::addr_of_mut!(self.repr.bitmap) } + unsafe { + core::ptr::addr_of_mut!(self.repr.bitmap) + } } else { // SAFETY: Bitmap is represented as array of `unsigned long`. unsafe { self.repr.ptr.as_ptr() } -- cgit v1.2.3 From e433110eb5bf067f74d3d15c5fb252206c66ae0b Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Tue, 14 Oct 2025 09:46:07 +0800 Subject: PCI: vmd: Override irq_startup()/irq_shutdown() in vmd_init_dev_msi_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 54f45a30c0d0 ("PCI/MSI: Add startup/shutdown for per device domains") set callback irq_startup() and irq_shutdown() of the struct pci_msi[x]_template, __irq_startup() will always invokes irq_startup() callback instead of irq_enable() callback overridden in vmd_init_dev_msi_info(). This will not start the IRQ correctly. Also override irq_startup()/irq_shutdown() in vmd_init_dev_msi_info(), so the irq_startup() can invoke the real logic. Fixes: 54f45a30c0d0 ("PCI/MSI: Add startup/shutdown for per device domains") Reported-by: Kenneth Crudup Closes: https://lore.kernel.org/r/8a923590-5b3a-406f-a324-7bd1cf894d8f@panix.com/ Reported-by: Genes Lists Closes: https://lore.kernel.org/r/4b392af8847cc19720ffcd53865f60ab3edc56b3.camel@sapience.com Reported-by: Todd Brandt Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220658 Reported-by: Oliver Hartkopp Closes: https://lore.kernel.org/r/8d6887a5-60bc-423c-8f7a-87b4ab739f6a@hartkopp.net Reported-by: Hervé Signed-off-by: Inochi Amaoto Signed-off-by: Bjorn Helgaas Tested-by: Kenneth R. Crudup Tested-by: Genes Lists Tested-by: Oliver Hartkopp Tested-by: Todd Brandt Tested-by: Hervé Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251014014607.612586-1-inochiama@gmail.com --- drivers/pci/controller/vmd.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 1bd5bf4a6097..b4b62b9ccc45 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -192,6 +192,12 @@ static void vmd_pci_msi_enable(struct irq_data *data) data->chip->irq_unmask(data); } +static unsigned int vmd_pci_msi_startup(struct irq_data *data) +{ + vmd_pci_msi_enable(data); + return 0; +} + static void vmd_irq_disable(struct irq_data *data) { struct vmd_irq *vmdirq = data->chip_data; @@ -210,6 +216,11 @@ static void vmd_pci_msi_disable(struct irq_data *data) vmd_irq_disable(data->parent_data); } +static void vmd_pci_msi_shutdown(struct irq_data *data) +{ + vmd_pci_msi_disable(data); +} + static struct irq_chip vmd_msi_controller = { .name = "VMD-MSI", .irq_compose_msi_msg = vmd_compose_msi_msg, @@ -309,6 +320,8 @@ static bool vmd_init_dev_msi_info(struct device *dev, struct irq_domain *domain, if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) return false; + info->chip->irq_startup = vmd_pci_msi_startup; + info->chip->irq_shutdown = vmd_pci_msi_shutdown; info->chip->irq_enable = vmd_pci_msi_enable; info->chip->irq_disable = vmd_pci_msi_disable; return true; -- cgit v1.2.3 From a78835b86a4414230e4cf9a9f16d22302cdb8388 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Mon, 13 Oct 2025 17:08:26 -0500 Subject: PCI/VGA: Select SCREEN_INFO on X86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 337bf13aa9dda ("PCI/VGA: Replace vga_is_firmware_default() with a screen info check") introduced an implicit dependency upon SCREEN_INFO by removing the open coded implementation. If a user didn't have CONFIG_SCREEN_INFO set, vga_is_firmware_default() would now return false. SCREEN_INFO is only used on X86 so add a conditional select for SCREEN_INFO to ensure that the VGA arbiter works as intended. Fixes: 337bf13aa9dda ("PCI/VGA: Replace vga_is_firmware_default() with a screen info check") Reported-by: Eric Biggers Closes: https://lore.kernel.org/linux-pci/20251012182302.GA3412@sol/ Suggested-by: Thomas Zimmermann Signed-off-by: Mario Limonciello (AMD) Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Zimmermann Reviewed-by: Ilpo Järvinen Tested-by: Eric Biggers Link: https://patch.msgid.link/20251013220829.1536292-1-superm1@kernel.org --- drivers/pci/Kconfig | 1 + drivers/pci/vgaarb.c | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 7065a8e5f9b1..f94f5d384362 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -306,6 +306,7 @@ config VGA_ARB bool "VGA Arbitration" if EXPERT default y depends on (PCI && !S390) + select SCREEN_INFO if X86 help Some "legacy" VGA devices implemented on PCI typically have the same hard-decoded addresses as they did on ISA. When multiple PCI devices diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index b58f94ee4891..436fa7f4c387 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -556,10 +556,8 @@ EXPORT_SYMBOL(vga_put); static bool vga_is_firmware_default(struct pci_dev *pdev) { -#ifdef CONFIG_SCREEN_INFO - struct screen_info *si = &screen_info; - - return pdev == screen_info_pci_dev(si); +#if defined CONFIG_X86 + return pdev == screen_info_pci_dev(&screen_info); #else return false; #endif -- cgit v1.2.3 From 2a786348004b34c5f61235d51c40c1c718b1f8f9 Mon Sep 17 00:00:00 2001 From: Dawn Gardner Date: Thu, 16 Oct 2025 15:42:06 -0300 Subject: ALSA: hda/realtek: Fix mute led for HP Omen 17-cb0xxx This laptop uses the ALC285 codec, fixed by enabling the ALC285_FIXUP_HP_MUTE_LED quirk Signed-off-by: Dawn Gardner Link: https://patch.msgid.link/20251016184218.31508-3-dawn.auroali@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 6b9e5c091bf3..8ad5febd822a 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6397,6 +6397,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x854a, "HP EliteBook 830 G6", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x85c6, "HP Pavilion x360 Convertible 14-dy1xxx", ALC295_FIXUP_HP_MUTE_LED_COEFBIT11), SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360), + SND_PCI_QUIRK(0x103c, 0x8603, "HP Omen 17-cb0xxx", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x860c, "HP ZBook 17 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), -- cgit v1.2.3 From 37c8c8d9c6ad0b34b662074b66fbe0dcde9d1fed Mon Sep 17 00:00:00 2001 From: Lukasz Laguna Date: Thu, 16 Oct 2025 14:22:33 +0200 Subject: drm/xe/pf: Always expose VRAM provisioning data on discrete GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, VRAM provisioning data is only exposed if the device supports LMTT. While it should not be possible to modify VRAM provisioning on platforms without LMTT, it is still useful to be able to read the VRAM provisioning data on all discrete GPU platforms. Expose the VRAM debugfs attributes whenever running on dGFX, adjusting file permissions to read only when LMTT is not available. Signed-off-by: Lukasz Laguna Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251016122233.3789-1-lukasz.laguna@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 3 ++- drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index b2e5c52978e6..c0c0215c0703 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1548,7 +1548,8 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size { int err; - xe_gt_assert(gt, xe_device_has_lmtt(gt_to_xe(gt))); + if (!xe_device_has_lmtt(gt_to_xe(gt))) + return -EPERM; mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); if (vfid) diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c index c8df18af4d00..6ba2332c77d4 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c @@ -165,14 +165,17 @@ DEFINE_SRIOV_TILE_CONFIG_DEBUGFS_ATTRIBUTE(vram, lmem, u64, "%llu\n"); static void pf_add_config_attrs(struct xe_tile *tile, struct dentry *dent, unsigned int vfid) { + struct xe_device *xe = tile->xe; + xe_tile_assert(tile, tile == extract_tile(dent)); xe_tile_assert(tile, vfid == extract_vfid(dent)); debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", 0644, dent, dent, &ggtt_fops); - if (xe_device_has_lmtt(tile->xe)) + if (IS_DGFX(xe)) debugfs_create_file_unsafe(vfid ? "vram_quota" : "vram_spare", - 0644, dent, dent, &vram_fops); + xe_device_has_lmtt(xe) ? 0644 : 0444, + dent, dent, &vram_fops); } static void pf_populate_tile(struct xe_tile *tile, struct dentry *dent, unsigned int vfid) @@ -188,7 +191,7 @@ static void pf_populate_tile(struct xe_tile *tile, struct dentry *dent, unsigned drm_debugfs_create_files(pf_ggtt_info, ARRAY_SIZE(pf_ggtt_info), dent, minor); - if (xe_device_has_lmtt(xe)) + if (IS_DGFX(xe)) drm_debugfs_create_files(pf_vram_info, ARRAY_SIZE(pf_vram_info), dent, minor); -- cgit v1.2.3 From a5efeaf8a1dbb007ecc6506cf4d532faaf9d02c5 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 15 Oct 2025 11:12:06 +0200 Subject: drm/xe/pf: Promote VFs provisioning helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we plan to add more VFs provisioning methods, start moving related code into single place. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20251015091211.592-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_pci_sriov.c | 45 ++-------------- drivers/gpu/drm/xe/xe_sriov_pf_provision.c | 83 ++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf_provision.h | 14 +++++ 4 files changed, 102 insertions(+), 41 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf_provision.c create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf_provision.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f4b7b2bcaa59..3fbec058facc 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -176,6 +176,7 @@ xe-$(CONFIG_PCI_IOV) += \ xe_sriov_pf.o \ xe_sriov_pf_control.o \ xe_sriov_pf_debugfs.o \ + xe_sriov_pf_provision.o \ xe_sriov_pf_service.o \ xe_tile_sriov_pf_debugfs.o diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 9c1c9e669b04..735f51effc7a 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -19,46 +19,9 @@ #include "xe_sriov_pf.h" #include "xe_sriov_pf_control.h" #include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_provision.h" #include "xe_sriov_printk.h" -static int pf_needs_provisioning(struct xe_gt *gt, unsigned int num_vfs) -{ - unsigned int n; - - for (n = 1; n <= num_vfs; n++) - if (!xe_gt_sriov_pf_config_is_empty(gt, n)) - return false; - - return true; -} - -static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) -{ - struct xe_gt *gt; - unsigned int id; - int result = 0, err; - - for_each_gt(gt, xe, id) { - if (!pf_needs_provisioning(gt, num_vfs)) - continue; - err = xe_gt_sriov_pf_config_set_fair(gt, VFID(1), num_vfs); - result = result ?: err; - } - - return result; -} - -static void pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) -{ - struct xe_gt *gt; - unsigned int id; - unsigned int n; - - for_each_gt(gt, xe, id) - for (n = 1; n <= num_vfs; n++) - xe_gt_sriov_pf_config_release(gt, n, true); -} - static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) { unsigned int n; @@ -168,7 +131,7 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) */ xe_pm_runtime_get_noresume(xe); - err = pf_provision_vfs(xe, num_vfs); + err = xe_sriov_pf_provision_vfs(xe, num_vfs); if (err < 0) goto failed; @@ -192,7 +155,7 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) return num_vfs; failed: - pf_unprovision_vfs(xe, num_vfs); + xe_sriov_pf_unprovision_vfs(xe, num_vfs); xe_pm_runtime_put(xe); out: xe_sriov_notice(xe, "Failed to enable %u VF%s (%pe)\n", @@ -218,7 +181,7 @@ static int pf_disable_vfs(struct xe_device *xe) pf_reset_vfs(xe, num_vfs); - pf_unprovision_vfs(xe, num_vfs); + xe_sriov_pf_unprovision_vfs(xe, num_vfs); /* not needed anymore - see pf_enable_vfs() */ xe_pm_runtime_put(xe); diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c new file mode 100644 index 000000000000..4895d0a70089 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_gt_sriov_pf_config.h" +#include "xe_sriov.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_provision.h" + +static bool pf_needs_provisioning(struct xe_gt *gt, unsigned int num_vfs) +{ + unsigned int n; + + for (n = 1; n <= num_vfs; n++) + if (!xe_gt_sriov_pf_config_is_empty(gt, n)) + return false; + + return true; +} + +static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + if (!pf_needs_provisioning(gt, num_vfs)) + continue; + err = xe_gt_sriov_pf_config_set_fair(gt, VFID(1), num_vfs); + result = result ?: err; + } + + return result; +} + +static void pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + struct xe_gt *gt; + unsigned int id; + unsigned int n; + + for_each_gt(gt, xe, id) + for (n = 1; n <= num_vfs; n++) + xe_gt_sriov_pf_config_release(gt, n, true); +} + +/** + * xe_sriov_pf_provision_vfs() - Provision VFs in auto-mode. + * @xe: the PF &xe_device + * @num_vfs: the number of VFs to auto-provision + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + return pf_provision_vfs(xe, num_vfs); +} + +/** + * xe_sriov_pf_unprovision_vfs() - Unprovision VFs in auto-mode. + * @xe: the PF &xe_device + * @num_vfs: the number of VFs to unprovision + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + pf_unprovision_vfs(xe, num_vfs); + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h new file mode 100644 index 000000000000..f6a902190ad7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_PROVISION_H_ +#define _XE_SRIOV_PF_PROVISION_H_ + +struct xe_device; + +int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs); +int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs); + +#endif -- cgit v1.2.3 From 5546bc207110d4b5c187f002097cde07d1926ab3 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 15 Oct 2025 11:12:07 +0200 Subject: drm/xe/pf: Automatically provision VFs only in auto-mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We shouldn't attempt to auto-provision VFs once we allow other provisioning methods. Make the code aware of the new condition. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20251015091211.592-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov_pf_provision.c | 16 +++++++++++- drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h | 32 ++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf_types.h | 4 +++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c index 4895d0a70089..1e782a96bbdd 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c @@ -9,6 +9,14 @@ #include "xe_sriov.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_pf_provision.h" +#include "xe_sriov_pf_provision_types.h" + +static bool pf_auto_provisioning_mode(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + return xe->sriov.pf.provision.mode == XE_SRIOV_PROVISIONING_MODE_AUTO; +} static bool pf_needs_provisioning(struct xe_gt *gt, unsigned int num_vfs) { @@ -30,7 +38,7 @@ static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) for_each_gt(gt, xe, id) { if (!pf_needs_provisioning(gt, num_vfs)) - continue; + return -EUCLEAN; err = xe_gt_sriov_pf_config_set_fair(gt, VFID(1), num_vfs); result = result ?: err; } @@ -62,6 +70,9 @@ int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) { xe_assert(xe, IS_SRIOV_PF(xe)); + if (!pf_auto_provisioning_mode(xe)) + return 0; + return pf_provision_vfs(xe, num_vfs); } @@ -78,6 +89,9 @@ int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) { xe_assert(xe, IS_SRIOV_PF(xe)); + if (!pf_auto_provisioning_mode(xe)) + return 0; + pf_unprovision_vfs(xe, num_vfs); return 0; } diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h new file mode 100644 index 000000000000..f72bc5db3a60 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_PROVISION_TYPES_H_ +#define _XE_SRIOV_PF_PROVISION_TYPES_H_ + +#include + +/** + * enum xe_sriov_provisioning_mode - SR-IOV provisioning mode. + * + * @XE_SRIOV_PROVISIONING_MODE_AUTO: VFs are provisioned during VFs enabling. + * Any allocated resources to the VFs will be + * automatically released when disabling VFs. + * This is a default mode. + */ +enum xe_sriov_provisioning_mode { + XE_SRIOV_PROVISIONING_MODE_AUTO, +}; +static_assert(XE_SRIOV_PROVISIONING_MODE_AUTO == 0); + +/** + * struct xe_sriov_pf_provision - Data used by the PF provisioning. + */ +struct xe_sriov_pf_provision { + /** @mode: selected provisioning mode. */ + enum xe_sriov_provisioning_mode mode; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h index 956a88f9f213..c753cd59aed2 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -9,6 +9,7 @@ #include #include +#include "xe_sriov_pf_provision_types.h" #include "xe_sriov_pf_service_types.h" /** @@ -35,6 +36,9 @@ struct xe_device_pf { /** @master_lock: protects all VFs configurations across GTs */ struct mutex master_lock; + /** @provision: device level provisioning data. */ + struct xe_sriov_pf_provision provision; + /** @service: device level service data. */ struct xe_sriov_pf_service service; -- cgit v1.2.3 From b1767ca123ae073c0aec66973a1b70a95930455b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 15 Oct 2025 11:12:08 +0200 Subject: drm/xe/pf: Disable auto-provisioning if changed using debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we attempt to tweak VFs configurations using debugfs, stop assuming that VFs need auto-(un)provisioning. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20251015091211.592-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 7 +++++ drivers/gpu/drm/xe/xe_sriov_pf_provision.c | 39 ++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf_provision.h | 17 +++++++++++ drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h | 4 +++ drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c | 3 ++ 5 files changed, 70 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index c026a3910e7e..838beb7f6327 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -23,6 +23,7 @@ #include "xe_gt_sriov_pf_service.h" #include "xe_pm.h" #include "xe_sriov_pf.h" +#include "xe_sriov_pf_provision.h" /* * /sys/kernel/debug/dri/BDF/ @@ -124,6 +125,8 @@ static int POLICY##_set(void *data, u64 val) \ \ xe_pm_runtime_get(xe); \ err = xe_gt_sriov_pf_policy_set_##POLICY(gt, val); \ + if (!err) \ + xe_sriov_pf_provision_set_custom_mode(xe); \ xe_pm_runtime_put(xe); \ \ return err; \ @@ -189,6 +192,8 @@ static int CONFIG##_set(void *data, u64 val) \ xe_pm_runtime_get(xe); \ err = xe_sriov_pf_wait_ready(xe) ?: \ xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ + if (!err) \ + xe_sriov_pf_provision_set_custom_mode(xe); \ xe_pm_runtime_put(xe); \ \ return err; \ @@ -246,6 +251,8 @@ static int set_threshold(void *data, u64 val, enum xe_guc_klv_threshold_index in xe_pm_runtime_get(xe); err = xe_gt_sriov_pf_config_set_threshold(gt, vfid, index, val); + if (!err) + xe_sriov_pf_provision_set_custom_mode(xe); xe_pm_runtime_put(xe); return err; diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c index 1e782a96bbdd..23b89c4310ed 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c @@ -10,6 +10,19 @@ #include "xe_sriov_pf_helpers.h" #include "xe_sriov_pf_provision.h" #include "xe_sriov_pf_provision_types.h" +#include "xe_sriov_printk.h" + +static const char *mode_to_string(enum xe_sriov_provisioning_mode mode) +{ + switch (mode) { + case XE_SRIOV_PROVISIONING_MODE_AUTO: + return "auto"; + case XE_SRIOV_PROVISIONING_MODE_CUSTOM: + return "custom"; + default: + return ""; + } +} static bool pf_auto_provisioning_mode(struct xe_device *xe) { @@ -95,3 +108,29 @@ int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) pf_unprovision_vfs(xe, num_vfs); return 0; } + +/** + * xe_sriov_pf_provision_set_mode() - Change VFs provision mode. + * @xe: the PF &xe_device + * @mode: the new VFs provisioning mode + * + * When changing from AUTO to CUSTOM mode, any already allocated VFs resources + * will remain allocated and will not be released upon VFs disabling. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_set_mode(struct xe_device *xe, enum xe_sriov_provisioning_mode mode) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + if (mode == xe->sriov.pf.provision.mode) + return 0; + + xe_sriov_dbg(xe, "mode %s changed to %s by %ps\n", + mode_to_string(xe->sriov.pf.provision.mode), + mode_to_string(mode), __builtin_return_address(0)); + xe->sriov.pf.provision.mode = mode; + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h index f6a902190ad7..cf3657a32e90 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h @@ -6,9 +6,26 @@ #ifndef _XE_SRIOV_PF_PROVISION_H_ #define _XE_SRIOV_PF_PROVISION_H_ +#include "xe_sriov_pf_provision_types.h" + struct xe_device; int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs); int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs); +int xe_sriov_pf_provision_set_mode(struct xe_device *xe, enum xe_sriov_provisioning_mode mode); + +/** + * xe_sriov_pf_provision_set_custom_mode() - Change VFs provision mode to custom. + * @xe: the PF &xe_device + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +static inline int xe_sriov_pf_provision_set_custom_mode(struct xe_device *xe) +{ + return xe_sriov_pf_provision_set_mode(xe, XE_SRIOV_PROVISIONING_MODE_CUSTOM); +} + #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h index f72bc5db3a60..a847b8a4c4da 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h @@ -15,9 +15,13 @@ * Any allocated resources to the VFs will be * automatically released when disabling VFs. * This is a default mode. + * @XE_SRIOV_PROVISIONING_MODE_CUSTOM: Explicit VFs provisioning using uABI interfaces. + * VFs resources remains allocated regardless if + * VFs are enabled or not. */ enum xe_sriov_provisioning_mode { XE_SRIOV_PROVISIONING_MODE_AUTO, + XE_SRIOV_PROVISIONING_MODE_CUSTOM, }; static_assert(XE_SRIOV_PROVISIONING_MODE_AUTO == 0); diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c index 6ba2332c77d4..f3f478f14ff5 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c @@ -15,6 +15,7 @@ #include "xe_tile_sriov_pf_debugfs.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_pf_provision.h" /* * /sys/kernel/debug/dri/BDF/ @@ -143,6 +144,8 @@ static int NAME##_set(void *data, u64 val) \ xe_pm_runtime_get(xe); \ err = xe_sriov_pf_wait_ready(xe) ?: \ xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ + if (!err) \ + xe_sriov_pf_provision_set_custom_mode(xe); \ xe_pm_runtime_put(xe); \ \ return err; \ -- cgit v1.2.3 From ee74634683e4b3e526a4b014b0358796e97c7ce3 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 15 Oct 2025 11:12:09 +0200 Subject: drm/xe/pf: Allow to restore auto-provisioning mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After doing tweaks to the VFs provisioning we may want to restore back the auto-provisioning mode. Allow that unless VFs are still enabled. This can be also used to release all VFs resources. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20251015091211.592-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c | 63 ++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf_helpers.h | 11 ++++++ drivers/gpu/drm/xe/xe_sriov_pf_provision.c | 18 +++++++++ 3 files changed, 92 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c index 97636ed86fb8..a81aa05c5532 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c @@ -13,6 +13,7 @@ #include "xe_sriov_pf_control.h" #include "xe_sriov_pf_debugfs.h" #include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_provision.h" #include "xe_sriov_pf_service.h" #include "xe_sriov_printk.h" #include "xe_tile_sriov_pf_debugfs.h" @@ -43,6 +44,66 @@ static unsigned int extract_vfid(struct dentry *d) return p == extract_xe(d) ? PFID : (uintptr_t)p; } +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * │ ├── restore_auto_provisioning + * │ : + * │ ├── pf/ + * │ ├── vf1 + * │ │ ├── ... + */ + +static ssize_t from_file_write_to_xe_call(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos, + int (*call)(struct xe_device *)) +{ + struct dentry *dent = file_dentry(file); + struct xe_device *xe = extract_xe(dent); + bool yes; + int ret; + + if (*ppos) + return -EINVAL; + ret = kstrtobool_from_user(userbuf, count, &yes); + if (ret < 0) + return ret; + if (yes) { + xe_pm_runtime_get(xe); + ret = call(xe); + xe_pm_runtime_put(xe); + } + if (ret < 0) + return ret; + return count; +} + +#define DEFINE_SRIOV_ATTRIBUTE(OP) \ +static int OP##_show(struct seq_file *s, void *unused) \ +{ \ + return 0; \ +} \ +static ssize_t OP##_write(struct file *file, const char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + return from_file_write_to_xe_call(file, userbuf, count, ppos, \ + xe_sriov_pf_##OP); \ +} \ +DEFINE_SHOW_STORE_ATTRIBUTE(OP) + +static inline int xe_sriov_pf_restore_auto_provisioning(struct xe_device *xe) +{ + return xe_sriov_pf_provision_set_mode(xe, XE_SRIOV_PROVISIONING_MODE_AUTO); +} + +DEFINE_SRIOV_ATTRIBUTE(restore_auto_provisioning); + +static void pf_populate_root(struct xe_device *xe, struct dentry *dent) +{ + debugfs_create_file("restore_auto_provisioning", 0200, dent, xe, + &restore_auto_provisioning_fops); +} + static int simple_show(struct seq_file *m, void *data) { struct drm_printer p = drm_seq_file_printer(m); @@ -167,6 +228,8 @@ void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) return; dent->d_inode->i_private = xe; + pf_populate_root(xe, dent); + /* * /sys/kernel/debug/dri/BDF/ * ├── sriov # d_inode->i_private = (xe_device*) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h index dd1df950b021..4a4340fb633a 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h @@ -37,6 +37,17 @@ static inline int xe_sriov_pf_get_totalvfs(struct xe_device *xe) return xe->sriov.pf.driver_max_vfs; } +/** + * xe_sriov_pf_num_vfs() - Number of enabled VFs on the PF. + * @xe: the PF &xe_device + * + * Return: Number of enabled VFs on the PF. + */ +static inline unsigned int xe_sriov_pf_num_vfs(const struct xe_device *xe) +{ + return pci_num_vf(to_pci_dev(xe->drm.dev)); +} + static inline struct mutex *xe_sriov_pf_master_mutex(struct xe_device *xe) { xe_assert(xe, IS_SRIOV_PF(xe)); diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c index 23b89c4310ed..663fb0c045e9 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c @@ -70,6 +70,11 @@ static void pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) xe_gt_sriov_pf_config_release(gt, n, true); } +static void pf_unprovision_all_vfs(struct xe_device *xe) +{ + pf_unprovision_vfs(xe, xe_sriov_pf_get_totalvfs(xe)); +} + /** * xe_sriov_pf_provision_vfs() - Provision VFs in auto-mode. * @xe: the PF &xe_device @@ -117,6 +122,10 @@ int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) * When changing from AUTO to CUSTOM mode, any already allocated VFs resources * will remain allocated and will not be released upon VFs disabling. * + * When changing back to AUTO mode, if VFs are not enabled, already allocated + * VFs resources will be immediately released. If VFs are still enabled, such + * mode change is rejected. + * * This function can only be called on PF. * * Return: 0 on success or a negative error code on failure. @@ -128,6 +137,15 @@ int xe_sriov_pf_provision_set_mode(struct xe_device *xe, enum xe_sriov_provision if (mode == xe->sriov.pf.provision.mode) return 0; + if (mode == XE_SRIOV_PROVISIONING_MODE_AUTO) { + if (xe_sriov_pf_num_vfs(xe)) { + xe_sriov_dbg(xe, "can't restore %s: VFs must be disabled!\n", + mode_to_string(mode)); + return -EBUSY; + } + pf_unprovision_all_vfs(xe); + } + xe_sriov_dbg(xe, "mode %s changed to %s by %ps\n", mode_to_string(xe->sriov.pf.provision.mode), mode_to_string(mode), __builtin_return_address(0)); -- cgit v1.2.3 From e9ad390a4812fd60c1da46823f7a6f84f2411f0c Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 7 Oct 2025 12:26:00 +0200 Subject: arm64/sysreg: Fix GIC CDEOI instruction encoding The GIC CDEOI system instruction requires the Rt field to be set to 0b11111 otherwise the instruction behaviour becomes CONSTRAINED UNPREDICTABLE. Currenly, its usage is encoded as a system register write, with a constant 0 value: write_sysreg_s(0, GICV5_OP_GIC_CDEOI) While compiling with GCC, the 0 constant value, through these asm constraints and modifiers ('x' modifier and 'Z' constraint combo): asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); forces the compiler to issue the XZR register for the MSR operation (ie that corresponds to Rt == 0b11111) issuing the right instruction encoding. Unfortunately LLVM does not yet understand that modifier/constraint combo so it ends up issuing a different register from XZR for the MSR source, which in turns means that it encodes the GIC CDEOI instruction wrongly and the instruction behaviour becomes CONSTRAINED UNPREDICTABLE that we must prevent. Add a conditional to write_sysreg_s() macro that detects whether it is passed a constant 0 value and issues an MSR write with XZR as source register - explicitly doing what the asm modifier/constraint is meant to achieve through constraints/modifiers, fixing the LLVM compilation issue. Fixes: 7ec80fb3f025 ("irqchip/gic-v5: Add GICv5 PPI support") Suggested-by: Catalin Marinas Signed-off-by: Lorenzo Pieralisi Acked-by: Marc Zyngier Cc: stable@vger.kernel.org Cc: Sascha Bischoff Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6455db1b54fd..c231d2a3e515 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1220,10 +1220,19 @@ __val; \ }) +/* + * The "Z" constraint combined with the "%x0" template should be enough + * to force XZR generation if (v) is a constant 0 value but LLVM does not + * yet understand that modifier/constraint combo so a conditional is required + * to nudge the compiler into using XZR as a source for a 0 constant value. + */ #define write_sysreg_s(v, r) do { \ u64 __val = (u64)(v); \ u32 __maybe_unused __check_r = (u32)(r); \ - asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ + if (__builtin_constant_p(__val) && __val == 0) \ + asm volatile(__msr_s(r, "xzr")); \ + else \ + asm volatile(__msr_s(r, "%x0") : : "r" (__val)); \ } while (0) /* -- cgit v1.2.3 From ea0d55ae4b3207c33691a73da3443b1fd379f1d2 Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Tue, 14 Oct 2025 10:25:36 +0100 Subject: arm64: debug: always unmask interrupts in el0_softstp() We intend that EL0 exception handlers unmask all DAIF exceptions before calling exit_to_user_mode(). When completing single-step of a suspended breakpoint, we do not call local_daif_restore(DAIF_PROCCTX) before calling exit_to_user_mode(), leaving all DAIF exceptions masked. When pseudo-NMIs are not in use this is benign. When pseudo-NMIs are in use, this is unsound. At this point interrupts are masked by both DAIF.IF and PMR_EL1, and subsequent irq flag manipulation may not work correctly. For example, a subsequent local_irq_enable() within exit_to_user_mode_loop() will only unmask interrupts via PMR_EL1 (leaving those masked via DAIF.IF), and anything depending on interrupts being unmasked (e.g. delivery of signals) will not work correctly. This was detected by CONFIG_ARM64_DEBUG_PRIORITY_MASKING. Move the call to `try_step_suspended_breakpoints()` outside of the check so that interrupts can be unmasked even if we don't call the step handler. Fixes: 0ac7584c08ce ("arm64: debug: split single stepping exception entry") Cc: # 6.17 Signed-off-by: Ada Couprie Diaz Acked-by: Mark Rutland [catalin.marinas@arm.com: added Mark's rewritten commit log and some whitespace] Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry-common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index f546a914f041..a9c81715ce59 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -697,6 +697,8 @@ static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr) static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) { + bool step_done; + if (!is_ttbr0_addr(regs->pc)) arm64_apply_bp_hardening(); @@ -707,10 +709,10 @@ static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) * If we are stepping a suspended breakpoint there's nothing more to do: * the single-step is complete. */ - if (!try_step_suspended_breakpoints(regs)) { - local_daif_restore(DAIF_PROCCTX); + step_done = try_step_suspended_breakpoints(regs); + local_daif_restore(DAIF_PROCCTX); + if (!step_done) do_el0_softstep(esr, regs); - } arm64_exit_to_user_mode(regs); } -- cgit v1.2.3 From 7c33e97a6ef5d84e98b892c3e00c6d1678d20395 Mon Sep 17 00:00:00 2001 From: Sahil Chandna Date: Wed, 15 Oct 2025 00:26:35 +0530 Subject: bpf: Do not disable preemption in bpf_test_run(). The timer mode is initialized to NO_PREEMPT mode by default, this disables preemption and force execution in atomic context causing issue on PREEMPT_RT configurations when invoking spin_lock_bh(), leading to the following warning: BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 6107, name: syz.0.17 preempt_count: 1, expected: 0 RCU nest depth: 1, expected: 1 Preemption disabled at: [] bpf_test_timer_enter+0xf8/0x140 net/bpf/test_run.c:42 Fix this, by removing NO_PREEMPT/NO_MIGRATE mode check. Also, the test timer context no longer needs explicit calls to migrate_disable()/migrate_enable() with rcu_read_lock()/rcu_read_unlock(). Use helpers rcu_read_lock_dont_migrate() and rcu_read_unlock_migrate() instead. Reported-by: syzbot+1f1fbecb9413cdbfbef8@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=1f1fbecb9413cdbfbef8 Suggested-by: Yonghong Song Suggested-by: Menglong Dong Acked-by: Yonghong Song Tested-by: syzbot+1f1fbecb9413cdbfbef8@syzkaller.appspotmail.com Co-developed-by: Brahmajit Das Signed-off-by: Brahmajit Das Signed-off-by: Sahil Chandna Link: https://lore.kernel.org/r/20251014185635.10300-1-chandna.sahil@gmail.com Signed-off-by: Alexei Starovoitov --- net/bpf/test_run.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 1782e83de2cb..8b7d0b90fea7 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -29,7 +29,6 @@ #include struct bpf_test_timer { - enum { NO_PREEMPT, NO_MIGRATE } mode; u32 i; u64 time_start, time_spent; }; @@ -37,12 +36,7 @@ struct bpf_test_timer { static void bpf_test_timer_enter(struct bpf_test_timer *t) __acquires(rcu) { - rcu_read_lock(); - if (t->mode == NO_PREEMPT) - preempt_disable(); - else - migrate_disable(); - + rcu_read_lock_dont_migrate(); t->time_start = ktime_get_ns(); } @@ -50,12 +44,7 @@ static void bpf_test_timer_leave(struct bpf_test_timer *t) __releases(rcu) { t->time_start = 0; - - if (t->mode == NO_PREEMPT) - preempt_enable(); - else - migrate_enable(); - rcu_read_unlock(); + rcu_read_unlock_migrate(); } static bool bpf_test_timer_continue(struct bpf_test_timer *t, int iterations, @@ -374,7 +363,7 @@ static int bpf_test_run_xdp_live(struct bpf_prog *prog, struct xdp_buff *ctx, { struct xdp_test_data xdp = { .batch_size = batch_size }; - struct bpf_test_timer t = { .mode = NO_MIGRATE }; + struct bpf_test_timer t = {}; int ret; if (!repeat) @@ -404,7 +393,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, struct bpf_prog_array_item item = {.prog = prog}; struct bpf_run_ctx *old_ctx; struct bpf_cg_run_ctx run_ctx; - struct bpf_test_timer t = { NO_MIGRATE }; + struct bpf_test_timer t = {}; enum bpf_cgroup_storage_type stype; int ret; @@ -1377,7 +1366,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - struct bpf_test_timer t = { NO_PREEMPT }; + struct bpf_test_timer t = {}; u32 size = kattr->test.data_size_in; struct bpf_flow_dissector ctx = {}; u32 repeat = kattr->test.repeat; @@ -1445,7 +1434,7 @@ out: int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - struct bpf_test_timer t = { NO_PREEMPT }; + struct bpf_test_timer t = {}; struct bpf_prog_array *progs = NULL; struct bpf_sk_lookup_kern ctx = {}; u32 repeat = kattr->test.repeat; -- cgit v1.2.3 From a1e83d4c0361f4b0e3b7ef8b603bf5e5ef60af86 Mon Sep 17 00:00:00 2001 From: Brahmajit Das Date: Fri, 17 Oct 2025 22:45:51 +0530 Subject: selftests/bpf: Fix redefinition of 'off' as different kind of symbol This fixes the following build error CLNG-BPF [test_progs] verifier_global_ptr_args.bpf.o progs/verifier_global_ptr_args.c:228:5: error: redefinition of 'off' as different kind of symbol 228 | u32 off; | ^ The symbol 'off' was previously defined in tools/testing/selftests/bpf/tools/include/vmlinux.h, which includes an enum i40e_ptp_gpio_pin_state from drivers/net/ethernet/intel/i40e/i40e_ptp.c: enum i40e_ptp_gpio_pin_state { end = -2, invalid = -1, off = 0, in_A = 1, in_B = 2, out_A = 3, out_B = 4, }; This enum is included when CONFIG_I40E is enabled. As of commit 032676ff8217 ("LoongArch: Update Loongson-3 default config file"), CONFIG_I40E is set in the defconfig, which leads to the conflict. Renaming the local variable avoids the redefinition and allows the build to succeed. Suggested-by: Yonghong Song Signed-off-by: Brahmajit Das Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20251017171551.53142-1-listout@listout.xyz Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/verifier_global_ptr_args.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index 6630a92b1b47..1204fbc58178 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -225,7 +225,7 @@ int trusted_to_untrusted(void *ctx) } char mem[16]; -u32 off; +u32 offset; SEC("tp_btf/sys_enter") __success @@ -240,9 +240,9 @@ int anything_to_untrusted(void *ctx) /* scalar to untrusted */ subprog_untrusted(0); /* variable offset to untrusted (map) */ - subprog_untrusted((void *)mem + off); + subprog_untrusted((void *)mem + offset); /* variable offset to untrusted (trusted) */ - subprog_untrusted((void *)bpf_get_current_task_btf() + off); + subprog_untrusted((void *)bpf_get_current_task_btf() + offset); return 0; } @@ -298,12 +298,12 @@ int anything_to_untrusted_mem(void *ctx) /* scalar to untrusted mem */ subprog_void_untrusted(0); /* variable offset to untrusted mem (map) */ - subprog_void_untrusted((void *)mem + off); + subprog_void_untrusted((void *)mem + offset); /* variable offset to untrusted mem (trusted) */ - subprog_void_untrusted(bpf_get_current_task_btf() + off); + subprog_void_untrusted(bpf_get_current_task_btf() + offset); /* variable offset to untrusted char/enum (map) */ - subprog_char_untrusted(mem + off); - subprog_enum_untrusted((void *)mem + off); + subprog_char_untrusted(mem + offset); + subprog_enum_untrusted((void *)mem + offset); return 0; } -- cgit v1.2.3 From 4fde66699f1ced2eeb6e58ff0eaf46e4fc92a7a0 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Thu, 16 Oct 2025 19:26:20 -0700 Subject: drm/xe/xe3: Add support for graphics IP versions 30.04 & 30.05 Add graphics IP versions 30.04 & 30.05 and initial workarounds for these IP versions. BSpec: 74201 Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-1-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 2 ++ drivers/gpu/drm/xe/xe_wa.c | 6 +++--- drivers/gpu/drm/xe/xe_wa_oob.rules | 9 +++++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 0f1a06b62e03..9744c572b1bd 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -136,6 +136,8 @@ static const struct xe_ip graphics_ips[] = { { 3000, "Xe3_LPG", &graphics_xe2 }, { 3001, "Xe3_LPG", &graphics_xe2 }, { 3003, "Xe3_LPG", &graphics_xe2 }, + { 3004, "Xe3_LPG", &graphics_xe2 }, + { 3005, "Xe3_LPG", &graphics_xe2 }, }; /* Pre-GMDID Media IPs */ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index aa1b69f48f6f..b6dcd9827354 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -684,7 +684,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { }, { XE_RTP_NAME("13012615864"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), OR, - GRAPHICS_VERSION(3003), + GRAPHICS_VERSION_RANGE(3003, 3005), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) }, @@ -695,7 +695,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3003, 3005), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, { XE_RTP_NAME("18041344222"), @@ -913,7 +913,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_AUTOSTRIP)) }, { XE_RTP_NAME("22021007897"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3003), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, }; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 4bb94e5799ed..fb38eb3d6e9a 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -33,14 +33,15 @@ 13011645652 GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) GRAPHICS_VERSION(3003) + GRAPHICS_VERSION_RANGE(3004, 3005) 14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) - GRAPHICS_VERSION_RANGE(3000, 3001) - GRAPHICS_VERSION(3003) + GRAPHICS_VERSION_RANGE(3000, 3005) 22019794406 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) GRAPHICS_VERSION(3003) + GRAPHICS_VERSION_RANGE(3004, 3005) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) @@ -61,11 +62,11 @@ 16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) MEDIA_VERSION_RANGE(1301, 3000) MEDIA_VERSION(3002) - GRAPHICS_VERSION(3003) + GRAPHICS_VERSION_RANGE(3003, 3005) 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) MEDIA_VERSION_RANGE(1300, 3000) MEDIA_VERSION(3002) - GRAPHICS_VERSION(3003) + GRAPHICS_VERSION_RANGE(3003, 3005) 14020001231 GRAPHICS_VERSION_RANGE(2001,2004), FUNC(xe_rtp_match_psmi_enabled) MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled) MEDIA_VERSION(3000), FUNC(xe_rtp_match_psmi_enabled) -- cgit v1.2.3 From fdce3e20dae82d529e9dbaf3dea155301bc7a4d7 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Thu, 16 Oct 2025 19:26:21 -0700 Subject: drm/xe/xe3p: Add support for media IP versions 35.00 & 35.03 Xe3p_LPM/Xe3p_HPM are very similar to Xe3_LPM on the kmd interface, so it can use the same descriptor structure. Add both 35.00 and 35.03 IP versions. BSpec: 74201, 74202, 77977, 77979 Cc: Balasubramani Vivekanandan Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-2-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 9744c572b1bd..f3ab19d3ee91 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -151,6 +151,8 @@ static const struct xe_ip media_ips[] = { { 2000, "Xe2_LPM", &media_xelpmp }, { 3000, "Xe3_LPM", &media_xelpmp }, { 3002, "Xe3_LPM", &media_xelpmp }, + { 3500, "Xe3p_LPM", &media_xelpmp }, + { 3503, "Xe3p_HPM", &media_xelpmp }, }; static const struct xe_device_desc tgl_desc = { -- cgit v1.2.3 From 26f368949e4ec5767ca2479fdba4d0deb5429958 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Thu, 16 Oct 2025 19:26:22 -0700 Subject: drm/xe: Drop CTC_MODE register read The warning was added for a condition that never triggered even for platforms prior to Xe2. It's not supported in Xe2 and in Xe3p the register is removed from the main GT. Just drop the entire function as it doesn't bring any benefit. Bspec: 62395 Signed-off-by: Balasubramani Vivekanandan [ Drop the entire check for CTC_MODE ] Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-3-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 4 ---- drivers/gpu/drm/xe/xe_gt_clock.c | 19 ------------------- 2 files changed, 23 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 51f2a03847f9..21c15441c453 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -346,10 +346,6 @@ #define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n)) #define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n)) -#define CTC_MODE XE_REG(0xa26c) -#define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) -#define CTC_SOURCE_DIVIDE_LOGIC REG_BIT(0) - #define FORCEWAKE_RENDER XE_REG(0xa278) #define POWERGATE_DOMAIN_STATUS XE_REG(0xa2a0) diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 4f011d1573c6..00f5972c14dc 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -55,30 +55,11 @@ static void read_crystal_clock(struct xe_gt *gt, u32 rpm_config_reg, u32 *freq, } } -static void check_ctc_mode(struct xe_gt *gt) -{ - /* - * CTC_MODE[0] = 1 is definitely not supported for Xe2 and later - * platforms. In theory it could be a valid setting for pre-Xe2 - * platforms, but there's no documentation on how to properly handle - * this case. Reading TIMESTAMP_OVERRIDE, as the driver attempted in - * the past has been confirmed as incorrect by the hardware architects. - * - * For now just warn if we ever encounter hardware in the wild that - * has this setting and move on as if it hadn't been set. - */ - if (xe_mmio_read32(>->mmio, CTC_MODE) & CTC_SOURCE_DIVIDE_LOGIC) - xe_gt_warn(gt, "CTC_MODE[0] is set; this is unexpected and undocumented\n"); -} - int xe_gt_clock_init(struct xe_gt *gt) { u32 freq; u32 c0; - if (!IS_SRIOV_VF(gt_to_xe(gt))) - check_ctc_mode(gt); - c0 = xe_mmio_read32(>->mmio, RPM_CONFIG0); read_crystal_clock(gt, c0, &freq, >->info.timestamp_base); -- cgit v1.2.3 From 4ad05339c5ec3c85d0b1fc124d22791b57d2a517 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 16 Oct 2025 19:26:23 -0700 Subject: drm/xe: Add GT_VER() to check version specific to gt type In some situations we will need to check the version of the specific gt being passed as argument, not if the device has a certain graphics/media version. This is extracted from a patch by Balasubramani Vivekanandan that may need some rework, but this helper is still useful for other enabling parts of Xe3p. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-4-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 5df2ffe3ff83..9d710049da45 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -22,6 +22,12 @@ #define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0) +#define GT_VER(gt) ({ \ + typeof(gt) gt_ = (gt); \ + struct xe_device *xe = gt_to_xe(gt_); \ + xe_gt_is_media_type(gt_) ? MEDIA_VER(xe) : GRAPHICS_VER(xe); \ +}) + extern struct fault_attr gt_reset_failure; static inline bool xe_fault_inject_gt_reset(void) { -- cgit v1.2.3 From 6f2aa1493d54f9966dc06772238fe5b7cb7aa267 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Thu, 16 Oct 2025 19:26:24 -0700 Subject: drm/xe/xe3p_lpm: Skip disabling NOA on unsupported IPs IP version 35 has removed "NOA Enable Signal" bit from RPM_CONFIG1 register. Skip clearing that bit on unsupported IPs. Bspec: 62391 Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-5-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_oa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a4894eb0d7f3..f901ba52b403 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -837,7 +837,8 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) xe_oa_configure_oa_context(stream, false); /* Make sure we disable noa to save power. */ - xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0); + if (GT_VER(stream->gt) < 35) + xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0); sqcnt1 = SQCNT1_PMON_ENABLE | (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); -- cgit v1.2.3 From 1553d6c58870476f29ec0bf43f264094553d1407 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 16 Oct 2025 19:26:25 -0700 Subject: drm/xe/xe3p_lpm: Handle MCR steering Xe3p_LPM's MCR steering has the same ranges and behavior as Xe3_LPM. However one register range that was reserved on Xe3_LPM has now become a unicast range (0x384200-0x38427F), so we need to stop consolidating the adjacent MCR ranges into a single table entry in the table. With this change to the Xe3_LPM table, we can continue to use the same table for both IP families. While we're touching this table, take the opportunity to fix a whitespace mistake and clarify that one of the other consolidated range entries includes a reserved range. Bspec: 76445 Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://lore.kernel.org/r/20251016-xe3p-v3-6-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 8fb1cae91724..e1a2b38fc2a8 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -236,12 +236,13 @@ static const struct xe_mmio_range xe2lpm_instance0_steering_table[] = { }; static const struct xe_mmio_range xe3lpm_instance0_steering_table[] = { - { 0x384000, 0x3847DF }, /* GAM, rsvd, GAM */ + { 0x384000, 0x3841FF }, /* GAM */ + { 0x384400, 0x3847DF }, /* GAM */ { 0x384900, 0x384AFF }, /* GAM */ { 0x389560, 0x3895FF }, /* MEDIAINF */ { 0x38B600, 0x38B8FF }, /* L3BANK */ { 0x38C800, 0x38D07F }, /* GAM, MEDIAINF */ - { 0x38D0D0, 0x38F0FF }, /* MEDIAINF, GAM */ + { 0x38D0D0, 0x38F0FF }, /* MEDIAINF, rsvd, GAM */ { 0x393C00, 0x393C7F }, /* MEDIAINF */ {}, }; -- cgit v1.2.3 From f4e9acaa5dd58ea4635a2c4a84d188e2a374b807 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 16 Oct 2025 19:26:26 -0700 Subject: drm/xe/xe3p: Stop programming RCU_MODE's fixed slice mode setting Since the hardware load balancing is no longer supported, the programming in RCU_MODE is no longer necessary. Bspec: 60382 Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://lore.kernel.org/r/20251016-xe3p-v3-7-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index cba4375525c7..b08a6d42c8ff 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -350,6 +350,13 @@ static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_device *xe, const struct xe_gt *gt, const struct xe_hw_engine *hwe) { + /* + * Xe3p no longer supports load balance mode, so "fixed cslice" mode + * is automatic and no RCU_MODE programming is required. + */ + if (GRAPHICS_VER(gt_to_xe(gt)) >= 35) + return false; + return xe_gt_ccs_mode_enabled(gt) && xe_rtp_match_first_render_or_compute(xe, gt, hwe); } -- cgit v1.2.3 From c3d318b7f605a74086474c9e7c6c9f2feca9a5db Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 16 Oct 2025 19:26:27 -0700 Subject: drm/xe/xe3p: Determine service copy availability from fuse Xe3p introduces a dedicated SERVICE_COPY_ENABLE fuse register to reflect the availability of the service copy engines (BCS1-BCS8). Bspec: 74624 Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251016-xe3p-v3-8-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++ drivers/gpu/drm/xe/xe_hw_engine.c | 43 ++++++++++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 21c15441c453..228de47c0f3f 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -239,6 +239,9 @@ #define XE2_GT_GEOMETRY_DSS_1 XE_REG(0x9150) #define XE2_GT_GEOMETRY_DSS_2 XE_REG(0x9154) +#define SERVICE_COPY_ENABLE XE_REG(0x9170) +#define FUSE_SERVICE_COPY_ENABLE_MASK REG_GENMASK(7, 0) + #define GDRST XE_REG(0x941c) #define GRDOM_GUC REG_BIT(3) #define GRDOM_FULL REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b08a6d42c8ff..073ecd263e54 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -718,27 +718,52 @@ static void read_media_fuses(struct xe_gt *gt) } } +static u32 infer_svccopy_from_meml3(struct xe_gt *gt) +{ + u32 meml3 = REG_FIELD_GET(MEML3_EN_MASK, + xe_mmio_read32(>->mmio, MIRROR_FUSE3)); + u32 svccopy_mask = 0; + + /* + * Each of the four meml3 bits determines the fusing of two service + * copy engines. + */ + for (int i = 0; i < 4; i++) + svccopy_mask |= (meml3 & BIT(i)) ? 0b11 << 2 * i : 0; + + return svccopy_mask; +} + +static u32 read_svccopy_fuses(struct xe_gt *gt) +{ + return REG_FIELD_GET(FUSE_SERVICE_COPY_ENABLE_MASK, + xe_mmio_read32(>->mmio, SERVICE_COPY_ENABLE)); +} + static void read_copy_fuses(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); u32 bcs_mask; - if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270) - return; - xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - bcs_mask = xe_mmio_read32(>->mmio, MIRROR_FUSE3); - bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask); + if (GRAPHICS_VER(xe) >= 35) + bcs_mask = read_svccopy_fuses(gt); + else if (GRAPHICS_VERx100(xe) == 1260) + bcs_mask = infer_svccopy_from_meml3(gt); + else + return; - /* BCS0 is always present; only BCS1-BCS8 may be fused off */ - for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { + /* Only BCS1-BCS8 may be fused off */ + bcs_mask <<= XE_HW_ENGINE_BCS1; + for (int i = XE_HW_ENGINE_BCS1; i <= XE_HW_ENGINE_BCS8; ++i) { if (!(gt->info.engine_mask & BIT(i))) continue; - if (!(BIT(j / 2) & bcs_mask)) { + if (!(bcs_mask & BIT(i))) { gt->info.engine_mask &= ~BIT(i); - xe_gt_info(gt, "bcs%u fused off\n", j); + xe_gt_info(gt, "bcs%u fused off\n", + i - XE_HW_ENGINE_BCS0); } } } -- cgit v1.2.3 From ccccbc53bd6ea7d407eac5b7a6b37da8c73fbb88 Mon Sep 17 00:00:00 2001 From: Wang Xin Date: Thu, 16 Oct 2025 19:26:28 -0700 Subject: drm/xe: Dump CURRENT_LRCA register Add CURRENT_LRCA to register dump to help debugging. Cc: Niranjana Vishwanathapura Signed-off-by: Wang Xin Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-9-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 ++ drivers/gpu/drm/xe/xe_guc_capture.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index f4c3e1187a00..0c02d0fe5531 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -141,6 +141,8 @@ #define INHIBIT_SWITCH_UNTIL_PREEMPTED REG_BIT(31) #define IDLE_DELAY REG_GENMASK(20, 0) +#define RING_CURRENT_LRCA(base) XE_REG((base) + 0x240) + #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) #define CTX_CTRL_PXP_ENABLE REG_BIT(10) #define CTX_CTRL_OAC_CONTEXT_ENABLE REG_BIT(8) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 243dad3e2418..8d1bfa2cdb15 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -122,6 +122,7 @@ struct __guc_capture_parsed_output { { RING_IPEHR(0), REG_32BIT, 0, 0, 0, "IPEHR"}, \ { RING_INSTDONE(0), REG_32BIT, 0, 0, 0, "RING_INSTDONE"}, \ { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, 0, "INDIRECT_RING_STATE"}, \ + { RING_CURRENT_LRCA(0), REG_32BIT, 0, 0, 0, "CURRENT_LRCA"}, \ { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "ACTHD"}, \ { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ -- cgit v1.2.3 From 7626cec652bd03aa302bd7a0fd87621ffed8cc5f Mon Sep 17 00:00:00 2001 From: Wang Xin Date: Thu, 16 Oct 2025 19:26:29 -0700 Subject: drm/xe/xe3p: Dump CSMQDEBUG register The CSMQDEBUG is useful for the development of MQ feature. Start dumping the debug register. Cc: Niranjana Vishwanathapura Cc: Matt Roper Signed-off-by: Wang Xin Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-10-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 ++ drivers/gpu/drm/xe/xe_guc_capture.c | 28 +++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 0c02d0fe5531..68172b0248a6 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -155,6 +155,8 @@ #define GFX_DISABLE_LEGACY_MODE REG_BIT(3) #define GFX_MSIX_INTERRUPT_ENABLE REG_BIT(13) +#define RING_CSMQDEBUG(base) XE_REG((base) + 0x2b0) + #define RING_TIMESTAMP(base) XE_REG((base) + 0x358) #define RING_TIMESTAMP_UDW(base) XE_REG((base) + 0x358 + 4) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 8d1bfa2cdb15..0c1fbe97b8bf 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -150,6 +150,9 @@ struct __guc_capture_parsed_output { { SFC_DONE(2), 0, 0, 0, 0, "SFC_DONE[2]"}, \ { SFC_DONE(3), 0, 0, 0, 0, "SFC_DONE[3]"} +#define XE3P_BASE_ENGINE_INSTANCE \ + { RING_CSMQDEBUG(0), REG_32BIT, 0, 0, 0, "CSMQDEBUG"} + /* XE_LP Global */ static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = { COMMON_XELP_BASE_GLOBAL, @@ -196,6 +199,12 @@ static const struct __guc_mmio_reg_descr xe_lp_gsc_inst_regs[] = { COMMON_BASE_ENGINE_INSTANCE, }; +/* Render / Compute Per-Engine-Instance */ +static const struct __guc_mmio_reg_descr xe3p_rc_inst_regs[] = { + COMMON_BASE_ENGINE_INSTANCE, + XE3P_BASE_ENGINE_INSTANCE, +}; + /* * Empty list to prevent warnings about unknown class/instance types * as not all class/instance types have entries on all platforms. @@ -246,6 +255,21 @@ static const struct __guc_mmio_reg_descr_group xe_hpg_lists[] = { {} }; + /* List of lists for Xe3p and beyond */ +static const struct __guc_mmio_reg_descr_group xe3p_lists[] = { + MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0), + MAKE_REGLIST(xe_hpg_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE), + MAKE_REGLIST(xe3p_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO), + MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO), + MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE), + MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER), + MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER), + MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER), + {} +}; static const char * const capture_list_type_names[] = { "Global", "Class", @@ -293,7 +317,9 @@ guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc, static const struct __guc_mmio_reg_descr_group * guc_capture_get_device_reglist(struct xe_device *xe) { - if (GRAPHICS_VERx100(xe) >= 1255) + if (GRAPHICS_VER(xe) >= 35) + return xe3p_lists; + else if (GRAPHICS_VERx100(xe) >= 1255) return xe_hpg_lists; else return xe_lp_lists; -- cgit v1.2.3 From c002b1764e7b0ffd181f32d5103d9d0ac283beeb Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 16 Oct 2025 19:26:30 -0700 Subject: drm/xe/nvl: Define NVL-S platform Provide the basic platform definitions and PCI IDs for NVL-S. Signed-off-by: Matt Roper Reviewed-by: Shekhar Chauhan Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251016-xe3p-v3-11-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 12 ++++++++++++ drivers/gpu/drm/xe/xe_platform_types.h | 1 + include/drm/intel/pciids.h | 9 +++++++++ 3 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f3ab19d3ee91..6dc5124967a1 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -377,6 +377,17 @@ static const struct xe_device_desc ptl_desc = { .vm_max_level = 4, }; +static const struct xe_device_desc nvls_desc = { + PLATFORM(NOVALAKE_S), + .dma_mask_size = 46, + .has_display = true, + .has_flat_ccs = 1, + .max_gt_per_tile = 2, + .require_force_probe = true, + .va_bits = 48, + .vm_max_level = 4, +}; + #undef PLATFORM __diag_pop(); @@ -403,6 +414,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), INTEL_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc), INTEL_PTL_IDS(INTEL_VGA_DEVICE, &ptl_desc), + INTEL_NVLS_IDS(INTEL_VGA_DEVICE, &nvls_desc), { } }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index 3e332214c7bb..78286285c249 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -24,6 +24,7 @@ enum xe_platform { XE_LUNARLAKE, XE_BATTLEMAGE, XE_PANTHERLAKE, + XE_NOVALAKE_S, }; enum xe_subplatform { diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 0cd12616d621..9f095a99d6c9 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -884,4 +884,13 @@ MACRO__(0xFD80, ## __VA_ARGS__), \ MACRO__(0xFD81, ## __VA_ARGS__) +/* NVL-S */ +#define INTEL_NVLS_IDS(MACRO__, ...) \ + MACRO__(0xD740, ## __VA_ARGS__), \ + MACRO__(0xD741, ## __VA_ARGS__), \ + MACRO__(0xD742, ## __VA_ARGS__), \ + MACRO__(0xD743, ## __VA_ARGS__), \ + MACRO__(0xD744, ## __VA_ARGS__), \ + MACRO__(0xD745, ## __VA_ARGS__) + #endif /* __PCIIDS_H__ */ -- cgit v1.2.3 From dbfdaeb381a49a7bc753d18e2876bc56a15e01cc Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Sat, 18 Oct 2025 14:25:18 +0300 Subject: tpm_crb: Add idle support for the Arm FF-A start method According to the CRB over FF-A specification [1], a TPM that implements the ABI must comply with the TCG PTP specification. This requires support for the Idle and Ready states. This patch implements CRB control area requests for goIdle and cmdReady on FF-A based TPMs. The FF-A message used to notify the TPM of CRB updates includes a locality parameter, which provides a hint to the TPM about which locality modified the CRB. This patch adds a locality parameter to __crb_go_idle() and __crb_cmd_ready() to support this. [1] https://developer.arm.com/documentation/den0138/latest/ Signed-off-by: Stuart Yoder Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm_crb.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index ed97344f2324..c75a531cfb98 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -133,8 +133,7 @@ static inline bool tpm_crb_has_idle(u32 start_method) { return !(start_method == ACPI_TPM2_START_METHOD || start_method == ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD || - start_method == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC || - start_method == ACPI_TPM2_CRB_WITH_ARM_FFA); + start_method == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC); } static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value, @@ -191,7 +190,7 @@ static int crb_try_pluton_doorbell(struct crb_priv *priv, bool wait_for_complete * * Return: 0 always */ -static int __crb_go_idle(struct device *dev, struct crb_priv *priv) +static int __crb_go_idle(struct device *dev, struct crb_priv *priv, int loc) { int rc; @@ -200,6 +199,12 @@ static int __crb_go_idle(struct device *dev, struct crb_priv *priv) iowrite32(CRB_CTRL_REQ_GO_IDLE, &priv->regs_t->ctrl_req); + if (priv->sm == ACPI_TPM2_CRB_WITH_ARM_FFA) { + rc = tpm_crb_ffa_start(CRB_FFA_START_TYPE_COMMAND, loc); + if (rc) + return rc; + } + rc = crb_try_pluton_doorbell(priv, true); if (rc) return rc; @@ -220,7 +225,7 @@ static int crb_go_idle(struct tpm_chip *chip) struct device *dev = &chip->dev; struct crb_priv *priv = dev_get_drvdata(dev); - return __crb_go_idle(dev, priv); + return __crb_go_idle(dev, priv, chip->locality); } /** @@ -238,7 +243,7 @@ static int crb_go_idle(struct tpm_chip *chip) * * Return: 0 on success -ETIME on timeout; */ -static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv) +static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv, int loc) { int rc; @@ -247,6 +252,12 @@ static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv) iowrite32(CRB_CTRL_REQ_CMD_READY, &priv->regs_t->ctrl_req); + if (priv->sm == ACPI_TPM2_CRB_WITH_ARM_FFA) { + rc = tpm_crb_ffa_start(CRB_FFA_START_TYPE_COMMAND, loc); + if (rc) + return rc; + } + rc = crb_try_pluton_doorbell(priv, true); if (rc) return rc; @@ -267,7 +278,7 @@ static int crb_cmd_ready(struct tpm_chip *chip) struct device *dev = &chip->dev; struct crb_priv *priv = dev_get_drvdata(dev); - return __crb_cmd_ready(dev, priv); + return __crb_cmd_ready(dev, priv, chip->locality); } static int __crb_request_locality(struct device *dev, @@ -444,7 +455,7 @@ static int crb_send(struct tpm_chip *chip, u8 *buf, size_t bufsiz, size_t len) /* Seems to be necessary for every command */ if (priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) - __crb_cmd_ready(&chip->dev, priv); + __crb_cmd_ready(&chip->dev, priv, chip->locality); memcpy_toio(priv->cmd, buf, len); @@ -672,7 +683,7 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, * PTT HW bug w/a: wake up the device to access * possibly not retained registers. */ - ret = __crb_cmd_ready(dev, priv); + ret = __crb_cmd_ready(dev, priv, 0); if (ret) goto out_relinquish_locality; @@ -744,7 +755,7 @@ out: if (!ret) priv->cmd_size = cmd_size; - __crb_go_idle(dev, priv); + __crb_go_idle(dev, priv, 0); out_relinquish_locality: -- cgit v1.2.3 From 60f148f6c2bba2a387533887388e077c93750aae Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Thu, 16 Oct 2025 19:26:32 -0700 Subject: drm/xe/nvls: Attach MOCS table for NVL-S The MOCS table for NVL-S is the same as that of Xe2. Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Shekhar Chauhan Link: https://lore.kernel.org/r/20251016-xe3p-v3-13-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mocs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 7b68c22ff7bb..e8ec4114302e 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -576,6 +576,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, memset(info, 0, sizeof(struct xe_mocs_info)); switch (xe->info.platform) { + case XE_NOVALAKE_S: case XE_PANTHERLAKE: case XE_LUNARLAKE: case XE_BATTLEMAGE: -- cgit v1.2.3 From e320b8841ea4e1f1ab425e407040401ae55f061f Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Thu, 16 Oct 2025 19:26:33 -0700 Subject: drm/xe/xe3p_xpc: Add Xe3p_XPC IP definition Add support for graphics IP Xe3p_XPC having IP version 35.11. Bspec: 77979, 77975 Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matt Roper Reviewed-by: Shekhar Chauhan Link: https://lore.kernel.org/r/20251016-xe3p-v3-14-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 6dc5124967a1..c326430e75b5 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -106,6 +106,13 @@ static const struct xe_graphics_desc graphics_xe2 = { XE2_GFX_FEATURES, }; +static const struct xe_graphics_desc graphics_xe3p_xpc = { + XE2_GFX_FEATURES, + .hw_engine_mask = + GENMASK(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS1) | + GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0), +}; + static const struct xe_media_desc media_xem = { .hw_engine_mask = GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | @@ -138,6 +145,7 @@ static const struct xe_ip graphics_ips[] = { { 3003, "Xe3_LPG", &graphics_xe2 }, { 3004, "Xe3_LPG", &graphics_xe2 }, { 3005, "Xe3_LPG", &graphics_xe2 }, + { 3511, "Xe3p_XPC", &graphics_xe3p_xpc }, }; /* Pre-GMDID Media IPs */ -- cgit v1.2.3 From e82a97bf6ab173d12d0f1c2e7aa04394b879add3 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Thu, 16 Oct 2025 19:26:34 -0700 Subject: drm/xe/xe3p_xpc: Add L3 bank mask Expose L3 bank mask through topology query interface. In Xe3p_XPC, MIRROR_L3BANK_ENABLE represents the full L3 bank mask (not just a per-node mask), and each bit represents a single bank. With that there's no extra complexity to calculate the L3 bank mask like there was in previous platforms. Bspec: 73439 Signed-off-by: Fei Yang Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-15-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_topology.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 80ef3a6e0a3b..1e0516ba7422 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -148,7 +148,11 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) if (!xe_gt_topology_report_l3(gt)) return; - if (GRAPHICS_VER(xe) >= 30) { + if (GRAPHICS_VER(xe) >= 35) { + u32 fuse_val = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE); + + bitmap_from_arr32(l3_bank_mask, &fuse_val, 32); + } else if (GRAPHICS_VER(xe) >= 30) { xe_l3_bank_mask_t per_node = {}; u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE); -- cgit v1.2.3 From be614ea19dadfe5ab08c5e52e0491d0d81210b55 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 16 Oct 2025 19:26:35 -0700 Subject: drm/xe/xe3p_xpc: Add MCR steering Xe3p_XPC's steering has a few changes from Xe3. Aside from minor changes to the XeCore (the new name for what used to be "DSS") and INSTANCE0 tables, different rules apply to different subranges of type "GAM." Certain GAM subranges require steering to grp/instance (0,0) (and thus use the INSTANCE0 table), while others require special steering to (1,0) instead. Similarly, there are multiple classes of "PSMI" steering, with some requiring steering to (0,0) while others require (19,0). There are some ranges in Bspec missing the termination. Add a TODO comment so they can eventually be updated. Bspec: 74418 Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20251016-xe3p-v3-16-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 62 ++++++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_gt_types.h | 15 ++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index e1a2b38fc2a8..81ecd9382635 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -169,6 +169,15 @@ static const struct xe_mmio_range xelpg_dss_steering_table[] = { {}, }; +static const struct xe_mmio_range xe3p_xpc_xecore_steering_table[] = { + { 0x008140, 0x00817F }, /* SLICE, XeCore, SLICE */ + { 0x009480, 0x00955F }, /* SLICE, XeCore */ + { 0x00D800, 0x00D87F }, /* SLICE */ + { 0x00DC00, 0x00E9FF }, /* SLICE, rsvd, XeCore, rsvd, XeCore, rsvd, XeCore */ + { 0x013000, 0x0135FF }, /* XeCore, SLICE */ + {}, +}; + static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = { { 0x393200, 0x39323F }, { 0x393400, 0x3934FF }, @@ -247,6 +256,30 @@ static const struct xe_mmio_range xe3lpm_instance0_steering_table[] = { {}, }; +/* + * Different "GAM" ranges have different rules; GAMWKRS, STLB, and GAMREQSTRM + * range subtypes need to be steered to (1,0), while all other GAM subtypes + * are steered to (0,0) and are included in the "INSTANCE0" table farther + * down. + */ +static const struct xe_mmio_range xe3p_xpc_gam_grp1_steering_table[] = { + { 0x004000, 0x004AFF }, /* GAMREQSTRM, rsvd, STLB, GAMWKRS, GAMREQSTRM */ + { 0x00F100, 0x00FFFF }, /* GAMWKRS */ + {}, +}; + +static const struct xe_mmio_range xe3p_xpc_psmi_grp19_steering_table[] = { + { 0x00B500, 0x00B5FF }, + {}, +}; + +static const struct xe_mmio_range xe3p_xpc_instance0_steering_table[] = { + { 0x00B600, 0x00B6FF }, /* PSMI0 */ + { 0x00C800, 0x00CFFF }, /* GAMCTRL */ + { 0x00F000, 0x00F0FF }, /* GAMCTRL */ + {}, +}; + static void init_steering_l3bank(struct xe_gt *gt) { struct xe_mmio *mmio = >->mmio; @@ -419,6 +452,18 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt) gt->steering[SQIDI_PSMI].instance_target = select & 0x1; } +static void init_steering_psmi(struct xe_gt *gt) +{ + gt->steering[PSMI19].group_target = 19; + gt->steering[PSMI19].instance_target = 0; +} + +static void init_steering_gam1(struct xe_gt *gt) +{ + gt->steering[GAM1].group_target = 1; + gt->steering[GAM1].instance_target = 0; +} + static const struct { const char *name; void (*init)(struct xe_gt *gt); @@ -426,9 +471,11 @@ static const struct { [L3BANK] = { "L3BANK", init_steering_l3bank }, [MSLICE] = { "MSLICE", init_steering_mslice }, [LNCF] = { "LNCF", NULL }, /* initialized by mslice init */ - [DSS] = { "DSS", init_steering_dss }, + [DSS] = { "DSS / XeCore", init_steering_dss }, [OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm }, [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, + [PSMI19] = { "PSMI[19]", init_steering_psmi }, + [GAM1] = { "GAMWKRS / STLB / GAMREQSTRM", init_steering_gam1 }, [INSTANCE0] = { "INSTANCE 0", NULL }, [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, }; @@ -467,7 +514,18 @@ void xe_gt_mcr_init_early(struct xe_gt *gt) gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table; } } else { - if (GRAPHICS_VER(xe) >= 20) { + if (GRAPHICS_VERx100(xe) == 3511) { + /* + * TODO: there are some ranges in bspec with missing + * termination: [0x00B000, 0x00B0FF] and + * [0x00D880, 0x00D8FF] (NODE); [0x00B100, 0x00B3FF] + * (L3BANK). Update them here once bspec is updated. + */ + gt->steering[DSS].ranges = xe3p_xpc_xecore_steering_table; + gt->steering[GAM1].ranges = xe3p_xpc_gam_grp1_steering_table; + gt->steering[INSTANCE0].ranges = xe3p_xpc_instance0_steering_table; + gt->steering[PSMI19].ranges = xe3p_xpc_psmi_grp19_steering_table; + } else if (GRAPHICS_VER(xe) >= 20) { gt->steering[DSS].ranges = xe2lpg_dss_steering_table; gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table; gt->steering[INSTANCE0].ranges = xe2lpg_instance0_steering_table; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 8b5f604d7883..d93faa1eedef 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -72,6 +72,21 @@ enum xe_steering_type { OADDRM, SQIDI_PSMI, + /* + * The bspec lists multiple ranges as "PSMI," but the different + * ranges with that label have different grpid steering values so we + * treat them independently in code. Note that the ranges with grpid=0 + * are included in the INSTANCE0 group above. + */ + PSMI19, + + /* + * Although most GAM ranges must be steered to (0,0) and thus use the + * INSTANCE0 type farther down, some platforms have special rules + * for specific subtypes that require steering to (1,0) instead. + */ + GAM1, + /* * On some platforms there are multiple types of MCR registers that * will always return a non-terminated value at instance (0, 0). We'll -- cgit v1.2.3 From 90a5cf095a3298030df977f001a85eb3d3298513 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 16 Oct 2025 19:26:36 -0700 Subject: drm/xe/irq: Rename fuse mask variables It's confusing to refer to some masks as the interrupt masks and others as the fuse masks. Rename the fuse one to make it clearer. Note that the most important role they play here is that the call to xe_hw_engine_mask_per_class() will not only limit the engines according to the fuses, but also by what is available in the specific architecture - the latter is more important information to know what interrupts should be enabled. Add a comment about that. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-17-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_irq.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 838fb512b777..9c3a85c4585e 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -139,7 +139,6 @@ void xe_irq_enable_hwe(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); struct xe_mmio *mmio = >->mmio; - u32 ccs_mask, bcs_mask; u32 irqs, dmask, smask; u32 gsc_mask = 0; u32 heci_mask = 0; @@ -157,36 +156,43 @@ void xe_irq_enable_hwe(struct xe_gt *gt) GT_WAIT_SEMAPHORE_INTERRUPT; } - ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); - bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); - dmask = irqs << 16 | irqs; smask = irqs << 16; if (xe_gt_is_main_type(gt)) { + /* + * For enabling the interrupts, the information about fused off + * engines doesn't matter much, but this also allows to check if + * the engine is available architecturally in the platform + */ + u32 ccs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); + u32 bcs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); + /* Enable interrupts for each engine class */ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask); - if (ccs_mask) + if (ccs_fuse_mask) xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, smask); /* Unmask interrupts for each engine instance */ xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~smask); xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~smask); - if (bcs_mask & (BIT(1)|BIT(2))) + if (bcs_fuse_mask & (BIT(1)|BIT(2))) xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask); - if (bcs_mask & (BIT(3)|BIT(4))) + if (bcs_fuse_mask & (BIT(3)|BIT(4))) xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask); - if (bcs_mask & (BIT(5)|BIT(6))) + if (bcs_fuse_mask & (BIT(5)|BIT(6))) xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask); - if (bcs_mask & (BIT(7)|BIT(8))) + if (bcs_fuse_mask & (BIT(7)|BIT(8))) xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask); - if (ccs_mask & (BIT(0)|BIT(1))) + if (ccs_fuse_mask & (BIT(0)|BIT(1))) xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~dmask); - if (ccs_mask & (BIT(2)|BIT(3))) + if (ccs_fuse_mask & (BIT(2)|BIT(3))) xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~dmask); } if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) { + u32 other_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER); + /* Enable interrupts for each engine class */ xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, dmask); @@ -199,7 +205,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) * the heci2 interrupt is enabled via the same register as the * GSCCS interrupts, but it has its own mask register. */ - if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) { + if (other_fuse_mask) { gsc_mask = irqs | GSC_ER_COMPLETE; heci_mask = GSC_IRQ_INTF(1); } else if (xe->info.has_heci_gscfi) { -- cgit v1.2.3 From 832bfaf873804d3c295977f13c01388aa97ad4ba Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 16 Oct 2025 19:26:37 -0700 Subject: drm/xe/irq: Split irq mask per engine class Each engine class has a different bitfield structure in the hw. We've been just using a common mask for all of them, but this means that we could inadvertently set a wrong bit in one class while enabling something in another. Split them to make it more future proof. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-18-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_irq.c | 73 +++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 9c3a85c4585e..142f422a5d97 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -139,25 +139,28 @@ void xe_irq_enable_hwe(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); struct xe_mmio *mmio = >->mmio; - u32 irqs, dmask, smask; - u32 gsc_mask = 0; - u32 heci_mask = 0; + u32 common_mask, val, gsc_mask = 0, heci_mask = 0, + rcs_mask = 0, bcs_mask = 0, vcs_mask = 0, vecs_mask = 0, + ccs_mask = 0; if (xe_device_uses_memirq(xe)) return; if (xe_device_uc_enabled(xe)) { - irqs = GT_RENDER_USER_INTERRUPT | - GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; + common_mask = GT_RENDER_USER_INTERRUPT | + GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; } else { - irqs = GT_RENDER_USER_INTERRUPT | - GT_CS_MASTER_ERROR_INTERRUPT | - GT_CONTEXT_SWITCH_INTERRUPT | - GT_WAIT_SEMAPHORE_INTERRUPT; + common_mask = GT_RENDER_USER_INTERRUPT | + GT_CS_MASTER_ERROR_INTERRUPT | + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; } - dmask = irqs << 16 | irqs; - smask = irqs << 16; + rcs_mask |= common_mask; + bcs_mask |= common_mask; + vcs_mask |= common_mask; + vecs_mask |= common_mask; + ccs_mask |= common_mask; if (xe_gt_is_main_type(gt)) { /* @@ -169,44 +172,62 @@ void xe_irq_enable_hwe(struct xe_gt *gt) u32 bcs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); /* Enable interrupts for each engine class */ - xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask); + xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, + REG_FIELD_PREP(ENGINE1_MASK, rcs_mask) | + REG_FIELD_PREP(ENGINE0_MASK, bcs_mask)); if (ccs_fuse_mask) - xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, smask); + xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, + REG_FIELD_PREP(ENGINE1_MASK, ccs_mask)); /* Unmask interrupts for each engine instance */ - xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~smask); - xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~smask); + val = ~REG_FIELD_PREP(ENGINE1_MASK, rcs_mask); + xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, val); + val = ~REG_FIELD_PREP(ENGINE1_MASK, bcs_mask); + xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, val); + + val = ~(REG_FIELD_PREP(ENGINE1_MASK, bcs_mask) | + REG_FIELD_PREP(ENGINE0_MASK, bcs_mask)); if (bcs_fuse_mask & (BIT(1)|BIT(2))) - xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, val); if (bcs_fuse_mask & (BIT(3)|BIT(4))) - xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, val); if (bcs_fuse_mask & (BIT(5)|BIT(6))) - xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, val); if (bcs_fuse_mask & (BIT(7)|BIT(8))) - xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, val); + + val = ~(REG_FIELD_PREP(ENGINE1_MASK, ccs_mask) | + REG_FIELD_PREP(ENGINE0_MASK, ccs_mask)); if (ccs_fuse_mask & (BIT(0)|BIT(1))) - xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, val); if (ccs_fuse_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, val); } if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) { u32 other_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER); /* Enable interrupts for each engine class */ - xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, dmask); + xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, + REG_FIELD_PREP(ENGINE1_MASK, vcs_mask) | + REG_FIELD_PREP(ENGINE0_MASK, vecs_mask)); /* Unmask interrupts for each engine instance */ - xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~dmask); - xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~dmask); - xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~dmask); + val = ~(REG_FIELD_PREP(ENGINE1_MASK, vcs_mask) | + REG_FIELD_PREP(ENGINE0_MASK, vcs_mask)); + xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, val); + xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, val); + + val = ~(REG_FIELD_PREP(ENGINE1_MASK, vecs_mask) | + REG_FIELD_PREP(ENGINE0_MASK, vecs_mask)); + xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, val); /* * the heci2 interrupt is enabled via the same register as the * GSCCS interrupts, but it has its own mask register. */ if (other_fuse_mask) { - gsc_mask = irqs | GSC_ER_COMPLETE; + gsc_mask = common_mask | GSC_ER_COMPLETE; heci_mask = GSC_IRQ_INTF(1); } else if (xe->info.has_heci_gscfi) { gsc_mask = GSC_IRQ_INTF(1); -- cgit v1.2.3 From 490fa7863b5bd232bb918433bff9b8c9fd5f3162 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 16 Oct 2025 19:26:38 -0700 Subject: drm/xe/irq: Rename bits used with all engines Two bit fields have similar functionality across the interrupt vectors but are named "RENDER". Rename them to follow the bspec more closely and clear any confusion when using them for other engines. Bspec: 62353, 62354, 62355, 62346, 62345, 63341 Suggested-by: Matt Roper Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-19-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_irq_regs.h | 4 ++-- drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- drivers/gpu/drm/xe/xe_irq.c | 6 +++--- drivers/gpu/drm/xe/xe_memirq.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h index 7c2a3a140142..f6117720963b 100644 --- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -80,9 +80,9 @@ #define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) #define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) #define GSC_ER_COMPLETE REG_BIT(5) -#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) +#define GT_FLUSH_COMPLETE_INTERRUPT REG_BIT(4) #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) -#define GT_RENDER_USER_INTERRUPT REG_BIT(0) +#define GT_MI_USER_INTERRUPT REG_BIT(0) /* irqs for OTHER_KCR_INSTANCE */ #define KCR_PXP_STATE_TERMINATED_INTERRUPT REG_BIT(1) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 073ecd263e54..6a9e2a4272dd 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -904,7 +904,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) if (hwe->irq_handler) hwe->irq_handler(hwe, intr_vec); - if (intr_vec & GT_RENDER_USER_INTERRUPT) + if (intr_vec & GT_MI_USER_INTERRUPT) xe_hw_fence_irq_run(hwe->fence_irq); } diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 142f422a5d97..2108c86ed478 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -147,10 +147,10 @@ void xe_irq_enable_hwe(struct xe_gt *gt) return; if (xe_device_uc_enabled(xe)) { - common_mask = GT_RENDER_USER_INTERRUPT | - GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; + common_mask = GT_MI_USER_INTERRUPT | + GT_FLUSH_COMPLETE_INTERRUPT; } else { - common_mask = GT_RENDER_USER_INTERRUPT | + common_mask = GT_MI_USER_INTERRUPT | GT_CS_MASTER_ERROR_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT | GT_WAIT_SEMAPHORE_INTERRUPT; diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 2ef9d9aab264..b0c7ce0a5d1e 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -434,8 +434,8 @@ static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *s { memirq_debug(memirq, "STATUS %s %*ph\n", hwe->name, 16, status->vaddr); - if (memirq_received(memirq, status, ilog2(GT_RENDER_USER_INTERRUPT), hwe->name)) - xe_hw_engine_handle_irq(hwe, GT_RENDER_USER_INTERRUPT); + if (memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name)) + xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT); } static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *status, -- cgit v1.2.3 From 22b7117ec8c45923d2413ea54fb97fedd472ceaf Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 16 Oct 2025 19:26:39 -0700 Subject: drm/xe/irq: Check fuse mask for media engines Just like the other engines, check xe_hw_engine_mask_per_class() for VCS and VECS to account for architectural availability of those registers. With that, all the possibly available media engines can have their interrupts enabled. Bspec: 54030 Suggested-by: Matt Roper Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-20-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_irq_regs.h | 3 +++ drivers/gpu/drm/xe/xe_irq.c | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h index f6117720963b..815d5e3d2209 100644 --- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -65,7 +65,10 @@ #define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF) #define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF) #define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF) +#define VCS4_VCS5_INTR_MASK XE_REG(0x1900b0, XE_REG_OPTION_VF) +#define VCS6_VCS7_INTR_MASK XE_REG(0x1900b4, XE_REG_OPTION_VF) #define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF) +#define VECS2_VECS3_INTR_MASK XE_REG(0x1900d4, XE_REG_OPTION_VF) #define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4) #define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF) #define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 2108c86ed478..8f2c8d3ae5f8 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -205,6 +205,8 @@ void xe_irq_enable_hwe(struct xe_gt *gt) } if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) { + u32 vcs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE); + u32 vecs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE); u32 other_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER); /* Enable interrupts for each engine class */ @@ -215,12 +217,21 @@ void xe_irq_enable_hwe(struct xe_gt *gt) /* Unmask interrupts for each engine instance */ val = ~(REG_FIELD_PREP(ENGINE1_MASK, vcs_mask) | REG_FIELD_PREP(ENGINE0_MASK, vcs_mask)); - xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, val); - xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, val); + if (vcs_fuse_mask & (BIT(0) | BIT(1))) + xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, val); + if (vcs_fuse_mask & (BIT(2) | BIT(3))) + xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, val); + if (vcs_fuse_mask & (BIT(4) | BIT(5))) + xe_mmio_write32(mmio, VCS4_VCS5_INTR_MASK, val); + if (vcs_fuse_mask & (BIT(6) | BIT(7))) + xe_mmio_write32(mmio, VCS6_VCS7_INTR_MASK, val); val = ~(REG_FIELD_PREP(ENGINE1_MASK, vecs_mask) | REG_FIELD_PREP(ENGINE0_MASK, vecs_mask)); - xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, val); + if (vecs_fuse_mask & (BIT(0) | BIT(1))) + xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, val); + if (vecs_fuse_mask & (BIT(2) | BIT(3))) + xe_mmio_write32(mmio, VECS2_VECS3_INTR_MASK, val); /* * the heci2 interrupt is enabled via the same register as the -- cgit v1.2.3 From 32e0fa9e01475beba9eeb0a5fdda69762be11947 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 16 Oct 2025 19:26:40 -0700 Subject: drm/xe/xe3p_xpc: Add support for compute walker for non-MSIx Current implementation of compute walker has dependency on GPU/SW Stack which requires SW/UMD to wait for event from KMD to indicate PIPE_CONTROL interrupt was done. This created latency on SW stack. This feature adds support to generate completion interrupt from GPGPU walker which does not support MSIx and avoid software using Pipe control drain/idle latency. The only thing needed for the kernel driver to do here is to wakeup the thread waiting on the ufence, which is already handled by the irq handler. Before waiting on this event, the userspace side can opt-in to this interrupt being generated by the HW by selecting the flag in the POST_SYNC_DATA_2 substructure's dw0[3] of COMPUTE_WALKER_2 instruction. Bspec: 62346, 74334 Suggested-by: Himal Prasad Ghimiray Signed-off-by: S A Muqthyar Ahmed Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251016-xe3p-v3-21-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_irq_regs.h | 1 + drivers/gpu/drm/xe/xe_irq.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h index 815d5e3d2209..2f97662d958d 100644 --- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -85,6 +85,7 @@ #define GSC_ER_COMPLETE REG_BIT(5) #define GT_FLUSH_COMPLETE_INTERRUPT REG_BIT(4) #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) +#define GT_COMPUTE_WALKER_INTERRUPT REG_BIT(2) #define GT_MI_USER_INTERRUPT REG_BIT(0) /* irqs for OTHER_KCR_INSTANCE */ diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 8f2c8d3ae5f8..e5ed0242f7b1 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -149,6 +149,12 @@ void xe_irq_enable_hwe(struct xe_gt *gt) if (xe_device_uc_enabled(xe)) { common_mask = GT_MI_USER_INTERRUPT | GT_FLUSH_COMPLETE_INTERRUPT; + + /* Enable Compute Walker Interrupt for non-MSIX platforms */ + if (GRAPHICS_VERx100(xe) >= 3511 && !xe_device_has_msix(xe)) { + rcs_mask |= GT_COMPUTE_WALKER_INTERRUPT; + ccs_mask |= GT_COMPUTE_WALKER_INTERRUPT; + } } else { common_mask = GT_MI_USER_INTERRUPT | GT_CS_MASTER_ERROR_INTERRUPT | -- cgit v1.2.3 From bd03427c9785e6b090f3e222928836b0e725640d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 16 Oct 2025 19:26:41 -0700 Subject: drm/xe/xe3p_xpc: Skip compression tuning on platforms without flatccs The compression overfetch tuning settings only apply to platforms that support FlatCCS. In Xe3p_XPC (and any future IPs that also lack compression) some of the registers being adjusted by this tuning will not exist or may have been repurposed for something else, so we should take care not to try to program them. Note that our xe_rtp_match_has_flatccs() function will also return false on platforms that do have FlatCCS in the hardware design, but have compression manually disabled in the BIOS. On such platforms the registers still exist (and it would be fine to continue programming them), but they would have no effect, so skipping that tuning is also safe. Signed-off-by: Matt Roper Reviewed-by: Shekhar Chauhan Link: https://lore.kernel.org/r/20251016-xe3p-v3-22-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_rtp.c | 7 +++++++ drivers/gpu/drm/xe/xe_rtp.h | 12 ++++++++++++ drivers/gpu/drm/xe/xe_tuning.c | 9 ++++++--- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 66707cc89ec9..ed509b1c8cfc 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -378,3 +378,10 @@ bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_device *xe, { return xe_gt_has_discontiguous_dss_groups(gt); } + +bool xe_rtp_match_has_flat_ccs(const struct xe_device *xe, + const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return xe->info.has_flat_ccs; +} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index e5b8a9452e29..ba5f940c0a96 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -491,4 +491,16 @@ bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_device *xe, const struct xe_gt *gt, const struct xe_hw_engine *hwe); +/** + * xe_rtp_match_has_flat_ccs - Match when platform has FlatCCS compression + * @xe: Device structure + * @gt: GT structure + * @hwe: Engine instance + * + * Returns: true if platform has FlatCCS compression, false otherwise + */ +bool xe_rtp_match_has_flat_ccs(const struct xe_device *xe, + const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + #endif diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index fd58ea5e78bf..7c140d8cb1e0 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -40,7 +40,8 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, { XE_RTP_NAME("Tuning: Compression Overfetch"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_has_flat_ccs)), XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), SET(CCCHKNREG1, L3CMPCTRL)) }, @@ -58,12 +59,14 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN)) }, { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_has_flat_ccs)), XE_RTP_ACTIONS(SET(L3SQCREG2, COMPMEMRD256BOVRFETCHEN)) }, { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"), - XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_has_flat_ccs)), XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2, COMPMEMRD256BOVRFETCHEN)) }, -- cgit v1.2.3 From bf3035fe45f5b21ad32b883ad34efac5372f45a4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 16 Oct 2025 19:26:42 -0700 Subject: drm/xe/xe3p_xpc: Setup PAT table Xe3p_XPC IP requires a new PAT table; note that this table has one fewer column than the Xe2/Xe3 tables since compression is not supported. There's also no "WT" entry (which we wouldn't have used on a platform without display anyway). Bspec: 71582 Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://lore.kernel.org/r/20251016-xe3p-v3-23-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pat.c | 96 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 6e48ff84ad0a..7649b554942a 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -154,6 +154,41 @@ static const struct xe_pat_table_entry xe2_pat_table[] = { static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 ); static const struct xe_pat_table_entry xe2_pat_pta = XE2_PAT( 0, 0, 0, 0, 3, 0 ); +/* + * Xe3p_XPC PAT table uses the same layout as Xe2/Xe3, except that there's no + * option for compression. Also note that the "L3" and "L4" register fields + * actually control L2 and L3 cache respectively on this platform. + */ +#define XE3P_XPC_PAT(no_promote, l3clos, l3_policy, l4_policy, __coh_mode) \ + XE2_PAT(no_promote, 0, l3clos, l3_policy, l4_policy, __coh_mode) + +static const struct xe_pat_table_entry xe3p_xpc_pat_ats = XE3P_XPC_PAT( 0, 0, 0, 0, 3 ); +static const struct xe_pat_table_entry xe3p_xpc_pat_pta = XE3P_XPC_PAT( 0, 0, 0, 0, 0 ); + +static const struct xe_pat_table_entry xe3p_xpc_pat_table[] = { + [ 0] = XE3P_XPC_PAT( 0, 0, 0, 0, 0 ), + [ 1] = XE3P_XPC_PAT( 0, 0, 0, 0, 2 ), + [ 2] = XE3P_XPC_PAT( 0, 0, 0, 0, 3 ), + [ 3] = XE3P_XPC_PAT( 0, 0, 3, 3, 0 ), + [ 4] = XE3P_XPC_PAT( 0, 0, 3, 3, 2 ), + [ 5] = XE3P_XPC_PAT( 0, 0, 3, 0, 0 ), + [ 6] = XE3P_XPC_PAT( 0, 0, 3, 0, 2 ), + [ 7] = XE3P_XPC_PAT( 0, 0, 3, 0, 3 ), + [ 8] = XE3P_XPC_PAT( 0, 0, 0, 3, 0 ), + [ 9] = XE3P_XPC_PAT( 0, 0, 0, 3, 2 ), + [10] = XE3P_XPC_PAT( 0, 0, 0, 3, 3 ), + /* 11..22 are reserved; leave set to all 0's */ + [23] = XE3P_XPC_PAT( 0, 1, 0, 0, 0 ), + [24] = XE3P_XPC_PAT( 0, 1, 0, 0, 2 ), + [25] = XE3P_XPC_PAT( 0, 1, 0, 0, 3 ), + [26] = XE3P_XPC_PAT( 0, 2, 0, 0, 0 ), + [27] = XE3P_XPC_PAT( 0, 2, 0, 0, 2 ), + [28] = XE3P_XPC_PAT( 0, 2, 0, 0, 3 ), + [29] = XE3P_XPC_PAT( 0, 3, 0, 0, 0 ), + [30] = XE3P_XPC_PAT( 0, 3, 0, 0, 2 ), + [31] = XE3P_XPC_PAT( 0, 3, 0, 0, 3 ), +}; + u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index) { WARN_ON(pat_index >= xe->pat.n_entries); @@ -380,9 +415,68 @@ static const struct xe_pat_ops xe2_pat_ops = { .dump = xe2_dump, }; +static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + unsigned int fw_ref; + u32 pat; + int i; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; + + drm_printf(p, "PAT table:\n"); + + for (i = 0; i < xe->pat.n_entries; i++) { + pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); + + drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u ] (%#8x)\n", i, + !!(pat & XE2_NO_PROMOTE), + REG_FIELD_GET(XE2_L3_CLOS, pat), + REG_FIELD_GET(XE2_L3_POLICY, pat), + REG_FIELD_GET(XE2_L4_POLICY, pat), + REG_FIELD_GET(XE2_COH_MODE, pat), + pat); + } + + /* + * Also print PTA_MODE, which describes how the hardware accesses + * PPGTT entries. + */ + pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA)); + + drm_printf(p, "Page Table Access:\n"); + drm_printf(p, "PTA_MODE= [ %u, %u, %u, %u, %u ] (%#8x)\n", + !!(pat & XE2_NO_PROMOTE), + REG_FIELD_GET(XE2_L3_CLOS, pat), + REG_FIELD_GET(XE2_L3_POLICY, pat), + REG_FIELD_GET(XE2_L4_POLICY, pat), + REG_FIELD_GET(XE2_COH_MODE, pat), + pat); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; +} + +static const struct xe_pat_ops xe3p_xpc_pat_ops = { + .program_graphics = program_pat_mcr, + .program_media = program_pat, + .dump = xe3p_xpc_dump, +}; + void xe_pat_init_early(struct xe_device *xe) { - if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { + if (GRAPHICS_VERx100(xe) == 3511) { + xe->pat.ops = &xe3p_xpc_pat_ops; + xe->pat.table = xe3p_xpc_pat_table; + xe->pat.pat_ats = &xe3p_xpc_pat_ats; + xe->pat.pat_pta = &xe3p_xpc_pat_pta; + xe->pat.n_entries = ARRAY_SIZE(xe3p_xpc_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; + xe->pat.idx[XE_CACHE_WT] = 3; /* N/A (no display); use UC */ + xe->pat.idx[XE_CACHE_WB] = 2; + } else if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { xe->pat.ops = &xe2_pat_ops; xe->pat.table = xe2_pat_table; xe->pat.pat_ats = &xe2_pat_ats; -- cgit v1.2.3 From d104d7ea864c1b9d8ffb3d93a58d49f9395be670 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Thu, 16 Oct 2025 19:26:43 -0700 Subject: drm/xe/xe3p: Add xe3p EU stall data format Starting with Xe3p, IP address in EU stall data increases to 61 bits. While at it, re-order the if-else ladder so the officially supported platforms come before PVC. Cc: Ashutosh Dixit Signed-off-by: Harish Chegondi Reviewed-by: Ashutosh Dixit Link: https://lore.kernel.org/r/20251016-xe3p-v3-24-3dd173a3097a@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_eu_stall.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index f5cfdf29fde3..650e45f6a7c7 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -124,6 +124,27 @@ struct xe_eu_stall_data_xe2 { __u64 unused[6]; } __packed; +/* + * EU stall data format for Xe3p arch GPUs. + */ +struct xe_eu_stall_data_xe3p { + __u64 ip_addr:61; /* Bits 0 to 60 */ + __u64 tdr_count:8; /* Bits 61 to 68 */ + __u64 other_count:8; /* Bits 69 to 76 */ + __u64 control_count:8; /* Bits 77 to 84 */ + __u64 pipestall_count:8; /* Bits 85 to 92 */ + __u64 send_count:8; /* Bits 93 to 100 */ + __u64 dist_acc_count:8; /* Bits 101 to 108 */ + __u64 sbid_count:8; /* Bits 109 to 116 */ + __u64 sync_count:8; /* Bits 117 to 124 */ + __u64 inst_fetch_count:8; /* Bits 125 to 132 */ + __u64 active_count:8; /* Bits 133 to 140 */ + __u64 ex_id:3; /* Bits 141 to 143 */ + __u64 end_flag:1; /* Bit 144 */ + __u64 unused_bits:47; + __u64 unused[5]; +} __packed; + const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7}; /** @@ -167,10 +188,13 @@ size_t xe_eu_stall_data_record_size(struct xe_device *xe) { size_t record_size = 0; - if (xe->info.platform == XE_PVC) - record_size = sizeof(struct xe_eu_stall_data_pvc); + if (GRAPHICS_VER(xe) >= 35) + record_size = sizeof(struct xe_eu_stall_data_xe3p); else if (GRAPHICS_VER(xe) >= 20) record_size = sizeof(struct xe_eu_stall_data_xe2); + else if (xe->info.platform == XE_PVC) + record_size = sizeof(struct xe_eu_stall_data_pvc); + xe_assert(xe, is_power_of_2(record_size)); -- cgit v1.2.3 From 211ddde0823f1442e4ad052a2f30f050145ccada Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Oct 2025 15:19:16 -1000 Subject: Linux 6.18-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 17cfa11ca716..d14824792227 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* -- cgit v1.2.3