From cc6b66d661fda4fb94c0099dd92b83f8de5c1bf4 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Thu, 21 Aug 2025 17:22:44 +0800 Subject: amdkfd: introduce new ioctl AMDKFD_IOC_CREATE_PROCESS This commit implemetns a new ioctl AMDKFD_IOC_CREATE_PROCESS that creates a new secondary kfd_progress on the FD. To keep backward compatibility, userspace programs need to invoke this ioctl explicitly on a FD to create a secondary kfd_process which replacing its primary kfd_process. This commit bumps ioctl minor version. Signed-off-by: Zhu Lingshan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 5d1727a6d040..84aa24c02715 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -44,9 +44,10 @@ * - 1.16 - Add contiguous VRAM allocation flag * - 1.17 - Add SDMA queue creation with target SDMA engine ID * - 1.18 - Rename pad in set_memory_policy_args to misc_process_flag + * - 1.19 - Add a new ioctl to craete secondary kfd processes */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 18 +#define KFD_IOCTL_MINOR_VERSION 19 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -1671,7 +1672,10 @@ struct kfd_ioctl_dbg_trap_args { #define AMDKFD_IOC_DBG_TRAP \ AMDKFD_IOWR(0x26, struct kfd_ioctl_dbg_trap_args) +#define AMDKFD_IOC_CREATE_PROCESS \ + AMDKFD_IO(0x27) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x27 +#define AMDKFD_COMMAND_END 0x28 #endif -- cgit v1.3 From 7a5fb05b5b18e531989aa55b10dfa4be0633207e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 5 Dec 2025 08:04:41 -0600 Subject: amdkfd: Bump ABI to indicate presence of Trap handler support for expert scheduling commit 0f0c8a6983db ("drm/amdkfd: Trap handler support for expert scheduling mode") introduced support for a trap handler when expert scheduling mode. However userspace needs to know whether or not a trap handler support is present. Bump the KFD IOCTL API so that userspace can key off this to decide. Suggested-by: Stella Laurenzo Fixes: 423888879412 ("drm/amdkfd: Trap handler support for expert scheduling mode") Reviewed-by: Kent Russell Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 84aa24c02715..4d0c1a53f9d5 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -45,9 +45,10 @@ * - 1.17 - Add SDMA queue creation with target SDMA engine ID * - 1.18 - Rename pad in set_memory_policy_args to misc_process_flag * - 1.19 - Add a new ioctl to craete secondary kfd processes + * - 1.20 - Trap handler support for expert scheduling mode available */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 19 +#define KFD_IOCTL_MINOR_VERSION 20 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ -- cgit v1.3 From e83f63da2ac776fbc30861e4ce8b798df6ee8a7a Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Mon, 23 Jun 2025 14:12:58 -0400 Subject: drm/amdkfd: allow debug subscription to lds violations on gfx 1250 GFX 1250 allows the debugger to subcribe to LDS out-of-range read/write memory violations. Bump IOCTL minor version and flag KFD capabilities for enablement hint. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 7 +++++++ drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 4 ++++ include/uapi/linux/kfd_ioctl.h | 4 +++- include/uapi/linux/kfd_sysfs.h | 3 ++- 4 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index ba9a09b6589a..f83e1238c1b3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -519,6 +519,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags) struct kfd_topology_device *topo_dev = kfd_topology_device_by_id(target->pdds[i]->dev->id); uint32_t caps = topo_dev->node_props.capability; + uint32_t caps2 = topo_dev->node_props.capability2; if (!(caps & HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED) && (*flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) { @@ -531,6 +532,12 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags) *flags = prev_flags; return -EACCES; } + + if (!(caps2 & HSA_CAP2_TRAP_DEBUG_LDS_OUT_OF_ADDR_RANGE_SUPPORTED) && + (*flags & KFD_DBG_TRAP_FLAG_LDS_OUT_OF_ADDR_RANGE)) { + *flags = prev_flags; + return -EACCES; + } } target->dbg_flags = *flags; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index a0990dd2378c..7a402c9c1b6e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -2028,6 +2028,10 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0)) dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED; + + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 1, 0)) + dev->node_props.capability2 |= + HSA_CAP2_TRAP_DEBUG_LDS_OUT_OF_ADDR_RANGE_SUPPORTED; } kfd_topology_set_dbg_firmware_support(dev); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 4d0c1a53f9d5..6e91875c10ba 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -46,9 +46,10 @@ * - 1.18 - Rename pad in set_memory_policy_args to misc_process_flag * - 1.19 - Add a new ioctl to craete secondary kfd processes * - 1.20 - Trap handler support for expert scheduling mode available + * - 1.21 - Debugger support to subscribe to LDS out-of-address exceptions */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 20 +#define KFD_IOCTL_MINOR_VERSION 21 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -947,6 +948,7 @@ enum kfd_dbg_trap_address_watch_mode { enum kfd_dbg_trap_flags { KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP = 1, KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP = 2, + KFD_DBG_TRAP_FLAG_LDS_OUT_OF_ADDR_RANGE = 4 }; /* Trap exceptions */ diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h index 1125fe47959f..0b6ce2f3c887 100644 --- a/include/uapi/linux/kfd_sysfs.h +++ b/include/uapi/linux/kfd_sysfs.h @@ -64,7 +64,8 @@ #define HSA_CAP_RESERVED 0x000f8000 #define HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED 0x00000001 -#define HSA_CAP2_RESERVED 0xfffffffe +#define HSA_CAP2_TRAP_DEBUG_LDS_OUT_OF_ADDR_RANGE_SUPPORTED 0x00000002 +#define HSA_CAP2_RESERVED 0xfffffffc /* debug_prop bits in node properties */ #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK 0x0000000f -- cgit v1.3 From c51bb53d5c68041dd02f66d9b638cda33647623e Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Tue, 18 Mar 2025 19:49:55 +0000 Subject: drm/amdkfd: Add metadata ring buffer for compute Add support for separate ring-buffer for metadata packets when using compute queues. Userspace application allocate the metadata ring-buffer and the queue ring-buffer with a single allocation. The metadata ring-buffer starts after the queue ring-buffer. Signed-off-by: David Yat Sin Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 8 ++++++++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c | 21 +++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 7 +++++-- include/uapi/linux/kfd_ioctl.h | 5 +++-- 5 files changed, 39 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 041237861107..88621cb7d409 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -221,6 +221,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE"); } + if ((args->metadata_ring_size != 0) && !is_power_of_2(args->metadata_ring_size)) { + pr_err("Metadata ring size must be a power of 2 or 0\n"); + return -EINVAL; + } + if (!access_ok((const void __user *) args->read_pointer_address, sizeof(uint32_t))) { pr_err("Can't access read pointer\n"); @@ -255,6 +260,9 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->priority = args->queue_priority; q_properties->queue_address = args->ring_base_address; q_properties->queue_size = args->ring_size; + if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) + q_properties->metadata_queue_size = args->metadata_ring_size; + q_properties->read_ptr = (void __user *)args->read_pointer_address; q_properties->write_ptr = (void __user *)args->write_pointer_address; q_properties->eop_ring_buffer_address = args->eop_buffer_address; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c index f1c2c9e8cf6b..a06b4e89af8a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c @@ -266,6 +266,27 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + if (q->metadata_queue_size) { + /* On GC 12.1 is 64 DWs which is 4 times size of AQL packet */ + if (q->metadata_queue_size == q->queue_size * 4) { + /* + * User application allocates main queue ring and metadata queue ring + * with a single allocation. metadata queue ring starts after main + * queue ring. + */ + m->cp_hqd_kd_base = + lower_32_bits((q->queue_address + q->queue_size) >> 8); + m->cp_hqd_kd_base_hi = + upper_32_bits((q->queue_address + q->queue_size) >> 8); + + m->cp_hqd_kd_cntl |= CP_HQD_KD_CNTL__KD_FETCHER_ENABLE_MASK; + /* KD_SIZE = 2 for metadata packet = 64 DWs */ + m->cp_hqd_kd_cntl |= 2 << CP_HQD_KD_CNTL__KD_SIZE__SHIFT; + } else { + pr_warn("Invalid metadata ring size, metadata queue will be ignored\n"); + } + } + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ebc637c38c04..d798baa7e52e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -506,7 +506,8 @@ struct queue_properties { enum kfd_queue_format format; unsigned int queue_id; uint64_t queue_address; - uint64_t queue_size; + uint64_t queue_size; + uint64_t metadata_queue_size; uint32_t priority; uint32_t queue_percent; void __user *read_ptr; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 56c97189e7f1..1b465fdb2c64 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -247,9 +247,12 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope properties->format == KFD_QUEUE_FORMAT_AQL && topo_dev->node_props.gfx_target_version >= 70000 && topo_dev->node_props.gfx_target_version < 90000) - expected_queue_size = properties->queue_size / 2; + /* metadata_queue_size not supported on GFX7/GFX8 */ + expected_queue_size = + properties->queue_size / 2; else - expected_queue_size = properties->queue_size; + expected_queue_size = + properties->queue_size + properties->metadata_queue_size; vm = drm_priv_to_vm(pdd->drm_priv); err = amdgpu_bo_reserve(vm->root.bo, false); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 6e91875c10ba..047bcb1cc078 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -47,9 +47,10 @@ * - 1.19 - Add a new ioctl to craete secondary kfd processes * - 1.20 - Trap handler support for expert scheduling mode available * - 1.21 - Debugger support to subscribe to LDS out-of-address exceptions + * - 1.22 - Add queue creation with metadata ring base address */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 21 +#define KFD_IOCTL_MINOR_VERSION 22 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -87,7 +88,7 @@ struct kfd_ioctl_create_queue_args { __u32 ctx_save_restore_size; /* to KFD */ __u32 ctl_stack_size; /* to KFD */ __u32 sdma_engine_id; /* to KFD */ - __u32 pad; + __u32 metadata_ring_size; /* to KFD */ }; struct kfd_ioctl_destroy_queue_args { -- cgit v1.3 From e698127eb7249d6c70fa8f2bdba469c0e54f2e2b Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 23 Jul 2025 10:07:28 -0400 Subject: drm/amdkfd: add extended capabilities to device snapshot Add additional capabilities reporting. Signed-off-by: Jonathan Kim Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 1 + include/uapi/linux/kfd_ioctl.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 27176b2dc714..8f8a0975f1a7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -1108,6 +1108,7 @@ int kfd_dbg_trap_device_snapshot(struct kfd_process *target, device_info.num_xcc = NUM_XCC(pdd->dev->xcc_mask); device_info.capability = topo_dev->node_props.capability; device_info.debug_prop = topo_dev->node_props.debug_prop; + device_info.capability2 = topo_dev->node_props.capability2; if (exception_clear_mask) pdd->exception_status &= ~exception_clear_mask; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 047bcb1cc078..e72359370857 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -149,6 +149,8 @@ struct kfd_dbg_device_info_entry { __u32 num_xcc; __u32 capability; __u32 debug_prop; + __u32 capability2; + __u32 pad; }; /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ -- cgit v1.3