diff options
author | Christian König <christian.koenig@amd.com> | 2025-09-22 14:18:16 +0200 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2025-10-13 14:14:14 -0400 |
commit | 8f74c70be57527d7b79e2ecf6de1a154d148254d (patch) | |
tree | 814f477610e1aae6cd4b5afdb3177f8d7c84157e | |
parent | 5d55ed19d4190d2c210ac05ac7a53f800a8c6fe5 (diff) |
drm/amdgpu: block CE CS if not explicitely allowed by module option
The Constant Engine found on gfx6-gfx10 HW has been a notorious source of
problems.
RADV never used it in the first place, radeonsi only used it for a few
releases around 2017 for gfx6-gfx9 before dropping support for it as
well.
While investigating another problem I just recently found that submitting
to the CE seems to be completely broken on gfx9 for quite a while.
Since nobody complained about that problem it most likely means that
nobody is using any of the affected radeonsi versions on current Linux
kernels any more.
So to potentially phase out the support for the CE and eliminate another
source of problems block submitting CE IBs unless it is enabled again
using a debug flag.
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Timur Kristóf <timur.kristof@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 |
3 files changed, 14 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2a0df4cabb99..6f5b4a0e0a34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1290,6 +1290,7 @@ struct amdgpu_device { bool debug_disable_gpu_ring_reset; bool debug_vm_userptr; bool debug_disable_ce_logs; + bool debug_enable_ce_cs; /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9cd7741d2254..ba9fb08db094 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -364,6 +364,12 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, if (p->uf_bo && ring->funcs->no_user_fence) return -EINVAL; + if (!p->adev->debug_enable_ce_cs && + chunk_ib->flags & AMDGPU_IB_FLAG_CE) { + dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n"); + return -EINVAL; + } + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index bff25ef3e2d0..61268aa82df4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -144,7 +144,8 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), AMDGPU_DEBUG_SMU_POOL = BIT(7), AMDGPU_DEBUG_VM_USERPTR = BIT(8), - AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9) + AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9), + AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10) }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -2289,6 +2290,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: disable kernel logs of correctable errors\n"); adev->debug_disable_ce_logs = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) { + pr_info("debug: allowing command submission to CE engine\n"); + adev->debug_enable_ce_cs = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) |