summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-17 08:16:58 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-17 08:16:58 -0700
commite96687c6d3b7814e6516dfa732946d3f40142819 (patch)
tree2560ba105ec783607b702ef4f8e5c1a14124792d /drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
parent389dfd9db6384026fef50afdbf91bcc41446e032 (diff)
parent62cab426d0e340cd38893227c279705cc9e8416a (diff)
Merge tag 'drm-fixes-2025-10-17' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie: "As per usual xe/amdgpu are the leaders, with some i915 and then a bunch of scattered fixes. There are a bunch of stability fixes for some older amdgpu cards. draw: - Avoid color truncation gpuvm: - Avoid kernel-doc warning sched: - Avoid double free i915: - Skip GuC communication warning if reset is in progress - Couple frontbuffer related fixes - Deactivate PSR only on LNL and when selective fetch enabled xe: - Increase global invalidation timeout to handle some workloads - Fix NPD while evicting BOs in an array of VM binds - Fix resizable BAR to account for possibly needing to move BARs other than the LMEMBAR - Fix error handling in xe_migrate_init() - Fix atomic fault handling with mixed mappings or if the page is already in VRAM - Enable media samplers power gating for platforms before Xe2 - Fix de-registering exec queue from GuC when unbinding - Ensure data migration to system if indicated by madvise with SVM - Fix kerneldoc for kunit change - Always account for cacheline alignment on migration - Drop bogus assertion on eviction amdgpu: - Backlight fix - SI fixes - CIK fix - Make CE support debug only - IP discovery fix - Ring reset fixes - GPUVM fault memory barrier fix - Drop unused structures in amdgpu_drm.h - JPEG debugfs fix - VRAM handling fixes for GPUs without VRAM - GC 12 MES fixes amdkfd: - MES fix ast: - Fix display output after reboot bridge: - lt9211: Fix version check panthor: - Fix MCU suspend qaic: - Init bootlog in correct order - Treat remaining == 0 as error in find_and_map_user_pages() - Lock access to DBC request queue rockchip: - vop2: Fix destination size in atomic check" * tag 'drm-fixes-2025-10-17' of https://gitlab.freedesktop.org/drm/kernel: (44 commits) drm/sched: Fix potential double free in drm_sched_job_add_resv_dependencies drm/xe/evict: drop bogus assert drm/xe/migrate: don't misalign current bytes drm/xe/kunit: Fix kerneldoc for parameterized tests drm/xe/svm: Ensure data will be migrated to system if indicated by madvise. drm/gpuvm: Fix kernel-doc warning for drm_gpuvm_map_req.map drm/i915/psr: Deactivate PSR only on LNL and when selective fetch enabled drm/ast: Blank with VGACR17 sync enable, always clear VGACRB6 sync off accel/qaic: Synchronize access to DBC request queue head & tail pointer accel/qaic: Treat remaining == 0 as error in find_and_map_user_pages() accel/qaic: Fix bootlog initialization ordering drm/rockchip: vop2: use correct destination rectangle height check drm/draw: fix color truncation in drm_draw_fill24 drm/xe/guc: Check GuC running state before deregistering exec queue drm/xe: Enable media sampler power gating drm/xe: Handle mixed mappings and existing VRAM on atomic faults drm/xe/migrate: Fix an error path drm/xe: Move rebar to be done earlier drm/xe: Don't allow evicting of BOs in same VM in array of VM binds drm/xe: Increase global invalidation timeout to 1000us ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c20
1 files changed, 11 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 5bf9be073cdd..4883adcfbb4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -409,7 +409,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
return -EINVAL;
/* Clear the doorbell array before detection */
- memset(adev->mes.hung_queue_db_array_cpu_addr, 0,
+ memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET,
adev->mes.hung_queue_db_array_size * sizeof(u32));
input.queue_type = queue_type;
input.detect_only = detect_only;
@@ -420,12 +420,17 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
dev_err(adev->dev, "failed to detect and reset\n");
} else {
*hung_db_num = 0;
- for (i = 0; i < adev->mes.hung_queue_db_array_size; i++) {
+ for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) {
if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) {
hung_db_array[i] = db_array[i];
*hung_db_num += 1;
}
}
+
+ /*
+ * TODO: return HQD info for MES scheduled user compute queue reset cases
+ * stored in hung_db_array hqd info offset to full array size
+ */
}
return r;
@@ -686,14 +691,11 @@ out:
bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev)
{
uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
- bool is_supported = false;
-
- if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
- amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) &&
- mes_rev >= 0x63)
- is_supported = true;
- return is_supported;
+ return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) &&
+ mes_rev >= 0x63) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0));
}
/* Fix me -- node_id is used to identify the correct MES instances in the future */