diff options
Diffstat (limited to 'drivers/accel/amdxdna')
25 files changed, 1483 insertions, 405 deletions
diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile index 6797dac65efa..6344aaf523fa 100644 --- a/drivers/accel/amdxdna/Makefile +++ b/drivers/accel/amdxdna/Makefile @@ -14,6 +14,7 @@ amdxdna-y := \ amdxdna_mailbox.o \ amdxdna_mailbox_helper.o \ amdxdna_pci_drv.o \ + amdxdna_pm.o \ amdxdna_sysfs.o \ amdxdna_ubuf.o \ npu1_regs.o \ diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO index ad8ac6e315b6..0e4bbebeaedf 100644 --- a/drivers/accel/amdxdna/TODO +++ b/drivers/accel/amdxdna/TODO @@ -1,2 +1 @@ - Add debugfs support -- Add debug BO support diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index e9f9b1fa5dc1..42d876a427c5 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -21,6 +21,7 @@ #include "amdxdna_gem.h" #include "amdxdna_mailbox.h" #include "amdxdna_pci_drv.h" +#include "amdxdna_pm.h" static bool force_cmdlist; module_param(force_cmdlist, bool, 0600); @@ -88,7 +89,7 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw goto out; } - ret = aie2_config_cu(hwctx); + ret = aie2_config_cu(hwctx, NULL); if (ret) { XDNA_ERR(xdna, "Config cu failed, ret %d", ret); goto out; @@ -167,14 +168,11 @@ static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg) int aie2_hwctx_resume(struct amdxdna_client *client) { - struct amdxdna_dev *xdna = client->xdna; - /* * The resume path cannot guarantee that mailbox channel can be * regenerated. If this happen, when submit message to this * mailbox channel, error will return. */ - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb); } @@ -184,12 +182,13 @@ aie2_sched_notify(struct amdxdna_sched_job *job) struct dma_fence *fence = job->fence; trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq); + + amdxdna_pm_suspend_put(job->hwctx->client->xdna); job->hwctx->priv->completed++; dma_fence_signal(fence); up(&job->hwctx->priv->job_sem); job->job_done = true; - dma_fence_put(fence); mmput_async(job->mm); aie2_job_put(job); } @@ -204,10 +203,13 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size) cmd_abo = job->cmd_bo; - if (unlikely(!data)) + if (unlikely(job->job_timeout)) { + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT); + ret = -EINVAL; goto out; + } - if (unlikely(size != sizeof(u32))) { + if (unlikely(!data) || unlikely(size != sizeof(u32))) { amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); ret = -EINVAL; goto out; @@ -226,11 +228,10 @@ out: } static int -aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size) +aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size) { struct amdxdna_sched_job *job = handle; int ret = 0; - u32 status; if (unlikely(!data)) goto out; @@ -240,8 +241,7 @@ aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size) goto out; } - status = readl(data); - XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); + job->drv_cmd->result = readl(data); out: aie2_sched_notify(job); @@ -260,6 +260,13 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size) int ret = 0; cmd_abo = job->cmd_bo; + + if (unlikely(job->job_timeout)) { + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT); + ret = -EINVAL; + goto out; + } + if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) { amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); ret = -EINVAL; @@ -314,8 +321,18 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) kref_get(&job->refcnt); fence = dma_fence_get(job->fence); - if (unlikely(!cmd_abo)) { - ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler); + if (job->drv_cmd) { + switch (job->drv_cmd->opcode) { + case SYNC_DEBUG_BO: + ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler); + break; + case ATTACH_DEBUG_BO: + ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler); + break; + default: + ret = -EINVAL; + break; + } goto out; } @@ -362,6 +379,7 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job) xdna = hwctx->client->xdna; trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq); + job->job_timeout = true; mutex_lock(&xdna->dev_lock); aie2_hwctx_stop(xdna, hwctx, sched_job); @@ -531,13 +549,12 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) .num_rqs = DRM_SCHED_PRIORITY_COUNT, .credit_limit = HWCTX_MAX_CMDS, .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT), - .name = hwctx->name, + .name = "amdxdna_js", .dev = xdna->ddev.dev, }; struct drm_gpu_scheduler *sched; struct amdxdna_hwctx_priv *priv; struct amdxdna_gem_obj *heap; - struct amdxdna_dev_hdl *ndev; int i, ret; priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL); @@ -610,10 +627,14 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) goto free_entity; } + ret = amdxdna_pm_resume_get(xdna); + if (ret) + goto free_col_list; + ret = aie2_alloc_resource(hwctx); if (ret) { XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret); - goto free_col_list; + goto suspend_put; } ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, @@ -628,10 +649,9 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret); goto release_resource; } + amdxdna_pm_suspend_put(xdna); hwctx->status = HWCTX_STAT_INIT; - ndev = xdna->dev_handle; - ndev->hwctx_num++; init_waitqueue_head(&priv->job_free_wq); XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); @@ -640,6 +660,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) release_resource: aie2_release_resource(hwctx); +suspend_put: + amdxdna_pm_suspend_put(xdna); free_col_list: kfree(hwctx->col_list); free_entity: @@ -662,26 +684,25 @@ free_priv: void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) { - struct amdxdna_dev_hdl *ndev; struct amdxdna_dev *xdna; int idx; xdna = hwctx->client->xdna; - ndev = xdna->dev_handle; - ndev->hwctx_num--; XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); - drm_sched_entity_destroy(&hwctx->priv->entity); - aie2_hwctx_wait_for_idle(hwctx); /* Request fw to destroy hwctx and cancel the rest pending requests */ aie2_release_resource(hwctx); + mutex_unlock(&xdna->dev_lock); + drm_sched_entity_destroy(&hwctx->priv->entity); + /* Wait for all submitted jobs to be completed or canceled */ wait_event(hwctx->priv->job_free_wq, atomic64_read(&hwctx->job_submit_cnt) == atomic64_read(&hwctx->job_free_cnt)); + mutex_lock(&xdna->dev_lock); drm_sched_fini(&hwctx->priv->sched); aie2_ctx_syncobj_destroy(hwctx); @@ -697,6 +718,14 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) kfree(hwctx->cus); } +static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size) +{ + struct amdxdna_hwctx *hwctx = handle; + + amdxdna_pm_suspend_put(hwctx->client->xdna); + return 0; +} + static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size) { struct amdxdna_hwctx_param_config_cu *config = buf; @@ -728,10 +757,14 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size if (!hwctx->cus) return -ENOMEM; - ret = aie2_config_cu(hwctx); + ret = amdxdna_pm_resume_get(xdna); + if (ret) + goto free_cus; + + ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler); if (ret) { XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret); - goto free_cus; + goto pm_suspend_put; } wmb(); /* To avoid locking in command submit when check status */ @@ -739,12 +772,82 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size return 0; +pm_suspend_put: + amdxdna_pm_suspend_put(xdna); free_cus: kfree(hwctx->cus); hwctx->cus = NULL; return ret; } +static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq) +{ + struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq); + + if (!out_fence) { + XDNA_ERR(hwctx->client->xdna, "Failed to get fence"); + return; + } + + dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT); + dma_fence_put(out_fence); +} + +static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl, + bool attach) +{ + struct amdxdna_client *client = hwctx->client; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_drv_cmd cmd = { 0 }; + struct amdxdna_gem_obj *abo; + u64 seq; + int ret; + + abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV); + if (!abo) { + XDNA_ERR(xdna, "Get bo %d failed", bo_hdl); + return -EINVAL; + } + + if (attach) { + if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) { + ret = -EBUSY; + goto put_obj; + } + cmd.opcode = ATTACH_DEBUG_BO; + } else { + if (abo->assigned_hwctx != hwctx->id) { + ret = -EINVAL; + goto put_obj; + } + cmd.opcode = DETACH_DEBUG_BO; + } + + ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE, + &bo_hdl, 1, hwctx->id, &seq); + if (ret) { + XDNA_ERR(xdna, "Submit command failed"); + goto put_obj; + } + + aie2_cmd_wait(hwctx, seq); + if (cmd.result) { + XDNA_ERR(xdna, "Response failure 0x%x", cmd.result); + goto put_obj; + } + + if (attach) + abo->assigned_hwctx = hwctx->id; + else + abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE; + + XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name); + +put_obj: + amdxdna_gem_put_obj(abo); + return ret; +} + int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size) { struct amdxdna_dev *xdna = hwctx->client->xdna; @@ -754,14 +857,40 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu case DRM_AMDXDNA_HWCTX_CONFIG_CU: return aie2_hwctx_cu_config(hwctx, buf, size); case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF: + return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true); case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF: - return -EOPNOTSUPP; + return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false); default: XDNA_DBG(xdna, "Not supported type %d", type); return -EOPNOTSUPP; } } +int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl) +{ + struct amdxdna_client *client = hwctx->client; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_drv_cmd cmd = { 0 }; + u64 seq; + int ret; + + cmd.opcode = SYNC_DEBUG_BO; + ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE, + &debug_bo_hdl, 1, hwctx->id, &seq); + if (ret) { + XDNA_ERR(xdna, "Submit command failed"); + return ret; + } + + aie2_cmd_wait(hwctx, seq); + if (cmd.result) { + XDNA_ERR(xdna, "Response failure 0x%x", cmd.result); + return -EINVAL; + } + + return 0; +} + static int aie2_populate_range(struct amdxdna_gem_obj *abo) { struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); @@ -862,11 +991,15 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, goto free_chain; } + ret = amdxdna_pm_resume_get(xdna); + if (ret) + goto cleanup_job; + retry: ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx); if (ret) { XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret); - goto cleanup_job; + goto suspend_put; } for (i = 0; i < job->bo_cnt; i++) { @@ -874,7 +1007,7 @@ retry: if (ret) { XDNA_WARN(xdna, "Failed to reserve fences %d", ret); drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); - goto cleanup_job; + goto suspend_put; } } @@ -889,12 +1022,12 @@ retry: msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); } else if (time_after(jiffies, timeout)) { ret = -ETIME; - goto cleanup_job; + goto suspend_put; } ret = aie2_populate_range(abo); if (ret) - goto cleanup_job; + goto suspend_put; goto retry; } } @@ -920,6 +1053,8 @@ retry: return 0; +suspend_put: + amdxdna_pm_suspend_put(xdna); cleanup_job: drm_sched_job_cleanup(&job->base); free_chain: diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c index 5ee905632a39..d452008ec4f4 100644 --- a/drivers/accel/amdxdna/aie2_error.c +++ b/drivers/accel/amdxdna/aie2_error.c @@ -13,6 +13,7 @@ #include "aie2_msg_priv.h" #include "aie2_pci.h" +#include "amdxdna_error.h" #include "amdxdna_mailbox.h" #include "amdxdna_pci_drv.h" @@ -46,6 +47,7 @@ enum aie_module_type { AIE_MEM_MOD = 0, AIE_CORE_MOD, AIE_PL_MOD, + AIE_UNKNOWN_MOD, }; enum aie_error_category { @@ -143,6 +145,31 @@ static const struct aie_event_category aie_ml_shim_tile_event_cat[] = { EVENT_CATEGORY(74U, AIE_ERROR_LOCK), }; +static const enum amdxdna_error_num aie_cat_err_num_map[] = { + [AIE_ERROR_SATURATION] = AMDXDNA_ERROR_NUM_AIE_SATURATION, + [AIE_ERROR_FP] = AMDXDNA_ERROR_NUM_AIE_FP, + [AIE_ERROR_STREAM] = AMDXDNA_ERROR_NUM_AIE_STREAM, + [AIE_ERROR_ACCESS] = AMDXDNA_ERROR_NUM_AIE_ACCESS, + [AIE_ERROR_BUS] = AMDXDNA_ERROR_NUM_AIE_BUS, + [AIE_ERROR_INSTRUCTION] = AMDXDNA_ERROR_NUM_AIE_INSTRUCTION, + [AIE_ERROR_ECC] = AMDXDNA_ERROR_NUM_AIE_ECC, + [AIE_ERROR_LOCK] = AMDXDNA_ERROR_NUM_AIE_LOCK, + [AIE_ERROR_DMA] = AMDXDNA_ERROR_NUM_AIE_DMA, + [AIE_ERROR_MEM_PARITY] = AMDXDNA_ERROR_NUM_AIE_MEM_PARITY, + [AIE_ERROR_UNKNOWN] = AMDXDNA_ERROR_NUM_UNKNOWN, +}; + +static_assert(ARRAY_SIZE(aie_cat_err_num_map) == AIE_ERROR_UNKNOWN + 1); + +static const enum amdxdna_error_module aie_err_mod_map[] = { + [AIE_MEM_MOD] = AMDXDNA_ERROR_MODULE_AIE_MEMORY, + [AIE_CORE_MOD] = AMDXDNA_ERROR_MODULE_AIE_CORE, + [AIE_PL_MOD] = AMDXDNA_ERROR_MODULE_AIE_PL, + [AIE_UNKNOWN_MOD] = AMDXDNA_ERROR_MODULE_UNKNOWN, +}; + +static_assert(ARRAY_SIZE(aie_err_mod_map) == AIE_UNKNOWN_MOD + 1); + static enum aie_error_category aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type) { @@ -176,12 +203,40 @@ aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type) if (event_id != lut[i].event_id) continue; + if (lut[i].category > AIE_ERROR_UNKNOWN) + return AIE_ERROR_UNKNOWN; + return lut[i].category; } return AIE_ERROR_UNKNOWN; } +static void aie2_update_last_async_error(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err) +{ + struct aie_error *errs = err_info; + enum amdxdna_error_module err_mod; + enum aie_error_category aie_err; + enum amdxdna_error_num err_num; + struct aie_error *last_err; + + last_err = &errs[num_err - 1]; + if (last_err->mod_type >= AIE_UNKNOWN_MOD) { + err_num = aie_cat_err_num_map[AIE_ERROR_UNKNOWN]; + err_mod = aie_err_mod_map[AIE_UNKNOWN_MOD]; + } else { + aie_err = aie_get_error_category(last_err->row, + last_err->event_id, + last_err->mod_type); + err_num = aie_cat_err_num_map[aie_err]; + err_mod = aie_err_mod_map[last_err->mod_type]; + } + + ndev->last_async_err.err_code = AMDXDNA_ERROR_ENCODE(err_num, err_mod); + ndev->last_async_err.ts_us = ktime_to_us(ktime_get_real()); + ndev->last_async_err.ex_err_code = AMDXDNA_EXTRA_ERR_ENCODE(last_err->row, last_err->col); +} + static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err) { struct aie_error *errs = err_info; @@ -264,29 +319,14 @@ static void aie2_error_worker(struct work_struct *err_work) } mutex_lock(&xdna->dev_lock); + aie2_update_last_async_error(e->ndev, info->payload, info->err_cnt); + /* Re-sent this event to firmware */ if (aie2_error_event_send(e)) XDNA_WARN(xdna, "Unable to register async event"); mutex_unlock(&xdna->dev_lock); } -int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev) -{ - struct amdxdna_dev *xdna = ndev->xdna; - struct async_event *e; - int i, ret; - - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); - for (i = 0; i < ndev->async_events->event_cnt; i++) { - e = &ndev->async_events->event[i]; - ret = aie2_error_event_send(e); - if (ret) - return ret; - } - - return 0; -} - void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev) { struct amdxdna_dev *xdna = ndev->xdna; @@ -341,6 +381,10 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev) e->size = ASYNC_BUF_SIZE; e->resp.status = MAX_AIE2_STATUS_CODE; INIT_WORK(&e->work, aie2_error_worker); + + ret = aie2_error_event_send(e); + if (ret) + goto free_wq; } ndev->async_events = events; @@ -349,6 +393,8 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev) events->event_cnt, events->size); return 0; +free_wq: + destroy_workqueue(events->wq); free_buf: dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf, events->addr, DMA_FROM_DEVICE); @@ -356,3 +402,18 @@ free_events: kfree(events); return ret; } + +int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, struct amdxdna_drm_get_array *args) +{ + struct amdxdna_dev *xdna = ndev->xdna; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + args->num_element = 1; + args->element_size = sizeof(ndev->last_async_err); + if (copy_to_user(u64_to_user_ptr(args->buffer), + &ndev->last_async_err, args->element_size)) + return -EFAULT; + + return 0; +} diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 9caad083543d..d493bb1c3360 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -27,6 +27,8 @@ #define DECLARE_AIE2_MSG(name, op) \ DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE) +#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops) + static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, struct xdna_mailbox_msg *msg) { @@ -37,7 +39,7 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, if (!ndev->mgmt_chann) return -ENODEV; - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna->dev_lock)); ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg); if (ret == -ETIME) { xdna_mailbox_stop_channel(ndev->mgmt_chann); @@ -45,7 +47,7 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, ndev->mgmt_chann = NULL; } - if (!ret && *hdl->data != AIE2_STATUS_SUCCESS) { + if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) { XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x", msg->opcode, *hdl->data); ret = -EINVAL; @@ -208,6 +210,14 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct hwctx->fw_ctx_id = resp.context_id; WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id"); + if (ndev->force_preempt_enabled) { + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id); + if (ret) { + XDNA_ERR(xdna, "failed to enable force preempt %d", ret); + return ret; + } + } + cq_pair = &resp.cq_pair[0]; x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr); x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr); @@ -233,6 +243,7 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct ret = -EINVAL; goto out_destroy_context; } + ndev->hwctx_num++; XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d", hwctx->name, ret, resp.msix_id); @@ -267,6 +278,7 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc hwctx->fw_ctx_id); hwctx->priv->mbox_chann = NULL; hwctx->fw_ctx_id = -1; + ndev->hwctx_num--; return ret; } @@ -332,11 +344,6 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, goto fail; } - if (resp.status != AIE2_STATUS_SUCCESS) { - XDNA_ERR(xdna, "Query NPU status failed, status 0x%x", resp.status); - ret = -EINVAL; - goto fail; - } XDNA_DBG(xdna, "Query NPU status completed"); if (size < resp.size) { @@ -358,6 +365,55 @@ fail: return ret; } +int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, + char __user *buf, u32 size, + struct amdxdna_drm_query_telemetry_header *header) +{ + DECLARE_AIE2_MSG(get_telemetry, MSG_OP_GET_TELEMETRY); + struct amdxdna_dev *xdna = ndev->xdna; + dma_addr_t dma_addr; + u8 *addr; + int ret; + + if (header->type >= MAX_TELEMETRY_TYPE) + return -EINVAL; + + addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr, + DMA_FROM_DEVICE, GFP_KERNEL); + if (!addr) + return -ENOMEM; + + req.buf_addr = dma_addr; + req.buf_size = size; + req.type = header->type; + + drm_clflush_virt_range(addr, size); /* device can access */ + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) { + XDNA_ERR(xdna, "Query telemetry failed, status %d", ret); + goto free_buf; + } + + if (size < resp.size) { + ret = -EINVAL; + XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size); + goto free_buf; + } + + if (copy_to_user(buf, addr, resp.size)) { + ret = -EFAULT; + XDNA_ERR(xdna, "Failed to copy telemetry to user space"); + goto free_buf; + } + + header->major = resp.major; + header->minor = resp.minor; + +free_buf: + dma_free_noncoherent(xdna->ddev.dev, size, addr, dma_addr, DMA_FROM_DEVICE); + return ret; +} + int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, void *handle, int (*cb)(void*, void __iomem *, size_t)) { @@ -377,15 +433,17 @@ int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT); } -int aie2_config_cu(struct amdxdna_hwctx *hwctx) +int aie2_config_cu(struct amdxdna_hwctx *hwctx, + int (*notify_cb)(void *, void __iomem *, size_t)) { struct mailbox_channel *chann = hwctx->priv->mbox_chann; struct amdxdna_dev *xdna = hwctx->client->xdna; u32 shift = xdna->dev_info->dev_mem_buf_shift; - DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU); + struct config_cu_req req = { 0 }; + struct xdna_mailbox_msg msg; struct drm_gem_object *gobj; struct amdxdna_gem_obj *abo; - int ret, i; + int i; if (!chann) return -ENODEV; @@ -423,191 +481,386 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx) } req.num_cus = hwctx->cus->num_cus; - ret = xdna_send_msg_wait(xdna, chann, &msg); - if (ret == -ETIME) - aie2_destroy_context(xdna->dev_handle, hwctx); + msg.send_data = (u8 *)&req; + msg.send_size = sizeof(req); + msg.handle = hwctx; + msg.opcode = MSG_OP_CONFIG_CU; + msg.notify_cb = notify_cb; + return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); +} - if (resp.status == AIE2_STATUS_SUCCESS) { - XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret); - return 0; - } +static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op) +{ + struct execute_buffer_req *cu_req = req; + u32 cmd_len; + void *cmd; - XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d", - msg.opcode, resp.status, ret); - return ret; + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + if (cmd_len > sizeof(cu_req->payload)) + return -EINVAL; + + cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (cu_req->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + memcpy(cu_req->payload, cmd, cmd_len); + + *size = sizeof(*cu_req); + *msg_op = MSG_OP_EXECUTE_BUFFER_CF; + return 0; } -int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, - int (*notify_cb)(void *, void __iomem *, size_t)) +static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op) { - struct mailbox_channel *chann = hwctx->priv->mbox_chann; - struct amdxdna_dev *xdna = hwctx->client->xdna; - struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; - union { - struct execute_buffer_req ebuf; - struct exec_dpu_req dpu; - } req; - struct xdna_mailbox_msg msg; - u32 payload_len; - void *payload; - int cu_idx; - int ret; - u32 op; + struct exec_dpu_req *dpu_req = req; + struct amdxdna_cmd_start_npu *sn; + u32 cmd_len; - if (!chann) - return -ENODEV; + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload)) + return -EINVAL; - payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); - if (!payload) { - XDNA_ERR(xdna, "Invalid command, cannot get payload"); + dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (dpu_req->cu_idx == INVALID_CU_IDX) return -EINVAL; - } - cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo); - if (cu_idx < 0) { - XDNA_DBG(xdna, "Invalid cu idx"); + dpu_req->inst_buf_addr = sn->buffer; + dpu_req->inst_size = sn->buffer_size; + dpu_req->inst_prop_cnt = sn->prop_count; + memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn)); + + *size = sizeof(*dpu_req); + *msg_op = MSG_OP_EXEC_DPU; + return 0; +} + +static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt) +{ + struct cmd_chain_req *chain_req = req; + + chain_req->buf_addr = slot_addr; + chain_req->buf_size = size; + chain_req->count = cmd_cnt; +} + +static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt) +{ + struct cmd_chain_npu_req *npu_chain_req = req; + + npu_chain_req->flags = 0; + npu_chain_req->reserved = 0; + npu_chain_req->buf_addr = slot_addr; + npu_chain_req->buf_size = size; + npu_chain_req->count = cmd_cnt; +} + +static int +aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_execbuf_cf *cf_slot = slot; + u32 cmd_len; + void *cmd; + + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + if (*size < sizeof(*cf_slot) + cmd_len) return -EINVAL; - } - op = amdxdna_cmd_get_op(cmd_abo); - switch (op) { + cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (cf_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + cf_slot->arg_cnt = cmd_len / sizeof(u32); + memcpy(cf_slot->args, cmd, cmd_len); + /* Accurate slot size to hint firmware to do necessary copy */ + *size = sizeof(*cf_slot) + cmd_len; + return 0; +} + +static int +aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_dpu *dpu_slot = slot; + struct amdxdna_cmd_start_npu *sn; + u32 cmd_len; + u32 arg_sz; + + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*sn); + if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) + return -EINVAL; + + if (*size < sizeof(*dpu_slot) + arg_sz) + return -EINVAL; + + dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (dpu_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + dpu_slot->inst_buf_addr = sn->buffer; + dpu_slot->inst_size = sn->buffer_size; + dpu_slot->inst_prop_cnt = sn->prop_count; + dpu_slot->arg_cnt = arg_sz / sizeof(u32); + memcpy(dpu_slot->args, sn->prop_args, arg_sz); + + /* Accurate slot size to hint firmware to do necessary copy */ + *size = sizeof(*dpu_slot) + arg_sz; + return 0; +} + +static int aie2_cmdlist_unsupp(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + return -EOPNOTSUPP; +} + +static u32 aie2_get_chain_msg_op(u32 cmd_op) +{ + switch (cmd_op) { case ERT_START_CU: - if (unlikely(payload_len > sizeof(req.ebuf.payload))) - XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len); - req.ebuf.cu_idx = cu_idx; - memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload)); - msg.send_size = sizeof(req.ebuf); - msg.opcode = MSG_OP_EXECUTE_BUFFER_CF; - break; - case ERT_START_NPU: { - struct amdxdna_cmd_start_npu *sn = payload; - - if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload))) - XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len); - req.dpu.inst_buf_addr = sn->buffer; - req.dpu.inst_size = sn->buffer_size; - req.dpu.inst_prop_cnt = sn->prop_count; - req.dpu.cu_idx = cu_idx; - memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload)); - msg.send_size = sizeof(req.dpu); - msg.opcode = MSG_OP_EXEC_DPU; + return MSG_OP_CHAIN_EXEC_BUFFER_CF; + case ERT_START_NPU: + return MSG_OP_CHAIN_EXEC_DPU; + default: break; } - default: - XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op); + + return MSG_OP_MAX_OPCODE; +} + +static struct aie2_exec_msg_ops legacy_exec_message_ops = { + .init_cu_req = aie2_init_exec_cu_req, + .init_dpu_req = aie2_init_exec_dpu_req, + .init_chain_req = aie2_init_exec_chain_req, + .fill_cf_slot = aie2_cmdlist_fill_cf, + .fill_dpu_slot = aie2_cmdlist_fill_dpu, + .fill_preempt_slot = aie2_cmdlist_unsupp, + .fill_elf_slot = aie2_cmdlist_unsupp, + .get_chain_msg_op = aie2_get_chain_msg_op, +}; + +static int +aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_npu *npu_slot = slot; + u32 cmd_len; + void *cmd; + + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + if (*size < sizeof(*npu_slot) + cmd_len) return -EINVAL; - } - msg.handle = job; - msg.notify_cb = notify_cb; - msg.send_data = (u8 *)&req; - print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req, - 0x40, false); - ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); - if (ret) { - XDNA_ERR(xdna, "Send message failed"); - return ret; - } + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (npu_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_NON_ELF; + npu_slot->arg_cnt = cmd_len / sizeof(u32); + memcpy(npu_slot->args, cmd, cmd_len); + + *size = sizeof(*npu_slot) + cmd_len; return 0; } static int -aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset, - struct amdxdna_gem_obj *abo, u32 *size) +aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) { - struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset; - int cu_idx = amdxdna_cmd_get_cu_idx(abo); - u32 payload_len; - void *payload; + struct cmd_chain_slot_npu *npu_slot = slot; + struct amdxdna_cmd_start_npu *sn; + u32 cmd_len; + u32 arg_sz; - if (cu_idx < 0) + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*sn); + if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE) return -EINVAL; - payload = amdxdna_cmd_get_payload(abo, &payload_len); - if (!payload) + if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; - if (!slot_has_space(*buf, offset, payload_len)) - return -ENOSPC; + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (npu_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF; + npu_slot->inst_buf_addr = sn->buffer; + npu_slot->inst_size = sn->buffer_size; + npu_slot->inst_prop_cnt = sn->prop_count; + npu_slot->arg_cnt = arg_sz / sizeof(u32); + memcpy(npu_slot->args, sn->prop_args, arg_sz); - buf->cu_idx = cu_idx; - buf->arg_cnt = payload_len / sizeof(u32); - memcpy(buf->args, payload, payload_len); - /* Accurate buf size to hint firmware to do necessary copy */ - *size = sizeof(*buf) + payload_len; + *size = sizeof(*npu_slot) + arg_sz; return 0; } static int -aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, - struct amdxdna_gem_obj *abo, u32 *size) +aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) { - struct cmd_chain_slot_dpu *buf = cmd_buf + offset; - int cu_idx = amdxdna_cmd_get_cu_idx(abo); - struct amdxdna_cmd_start_npu *sn; - u32 payload_len; - void *payload; + struct cmd_chain_slot_npu *npu_slot = slot; + struct amdxdna_cmd_preempt_data *pd; + u32 cmd_len; u32 arg_sz; - if (cu_idx < 0) + pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*pd); + if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) return -EINVAL; - payload = amdxdna_cmd_get_payload(abo, &payload_len); - if (!payload) + if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; - sn = payload; - arg_sz = payload_len - sizeof(*sn); - if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) + + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; - if (!slot_has_space(*buf, offset, arg_sz)) - return -ENOSPC; + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_PREEMPT; + npu_slot->inst_buf_addr = pd->inst_buf; + npu_slot->save_buf_addr = pd->save_buf; + npu_slot->restore_buf_addr = pd->restore_buf; + npu_slot->inst_size = pd->inst_size; + npu_slot->save_size = pd->save_size; + npu_slot->restore_size = pd->restore_size; + npu_slot->inst_prop_cnt = pd->inst_prop_cnt; + npu_slot->arg_cnt = arg_sz / sizeof(u32); + memcpy(npu_slot->args, pd->prop_args, arg_sz); + + *size = sizeof(*npu_slot) + arg_sz; + return 0; +} + +static int +aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_npu *npu_slot = slot; + struct amdxdna_cmd_preempt_data *pd; + u32 cmd_len; + u32 arg_sz; + + pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*pd); + if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) + return -EINVAL; - buf->inst_buf_addr = sn->buffer; - buf->inst_size = sn->buffer_size; - buf->inst_prop_cnt = sn->prop_count; - buf->cu_idx = cu_idx; - buf->arg_cnt = arg_sz / sizeof(u32); - memcpy(buf->args, sn->prop_args, arg_sz); + if (*size < sizeof(*npu_slot) + arg_sz) + return -EINVAL; - /* Accurate buf size to hint firmware to do necessary copy */ - *size = sizeof(*buf) + arg_sz; + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_ELF; + npu_slot->inst_buf_addr = pd->inst_buf; + npu_slot->save_buf_addr = pd->save_buf; + npu_slot->restore_buf_addr = pd->restore_buf; + npu_slot->inst_size = pd->inst_size; + npu_slot->save_size = pd->save_size; + npu_slot->restore_size = pd->restore_size; + npu_slot->inst_prop_cnt = pd->inst_prop_cnt; + npu_slot->arg_cnt = 1; + npu_slot->args[0] = AIE2_EXEC_BUFFER_KERNEL_OP_TXN; + + *size = struct_size(npu_slot, args, npu_slot->arg_cnt); return 0; } -static int -aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset, - struct amdxdna_gem_obj *abo, u32 *size) +static u32 aie2_get_npu_chain_msg_op(u32 cmd_op) +{ + return MSG_OP_CHAIN_EXEC_NPU; +} + +static struct aie2_exec_msg_ops npu_exec_message_ops = { + .init_cu_req = aie2_init_exec_cu_req, + .init_dpu_req = aie2_init_exec_dpu_req, + .init_chain_req = aie2_init_npu_chain_req, + .fill_cf_slot = aie2_cmdlist_fill_npu_cf, + .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu, + .fill_preempt_slot = aie2_cmdlist_fill_npu_preempt, + .fill_elf_slot = aie2_cmdlist_fill_npu_elf, + .get_chain_msg_op = aie2_get_npu_chain_msg_op, +}; + +static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo, + size_t *size, u32 *msg_op) { - u32 this_op = amdxdna_cmd_get_op(abo); - void *cmd_buf = cmdbuf_abo->mem.kva; + struct amdxdna_dev *xdna = cmd_abo->client->xdna; int ret; + u32 op; - if (this_op != op) { - ret = -EINVAL; - goto done; - } + op = amdxdna_cmd_get_op(cmd_abo); switch (op) { case ERT_START_CU: - ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size); + ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op); + if (ret) { + XDNA_DBG(xdna, "Init CU req failed ret %d", ret); + return ret; + } break; case ERT_START_NPU: - ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size); + ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op); + if (ret) { + XDNA_DBG(xdna, "Init DPU req failed ret %d", ret); + return ret; + } + break; default: + XDNA_ERR(xdna, "Unsupported op %d", op); ret = -EOPNOTSUPP; + break; } -done: - if (ret) { - XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d", - op, ret); + return ret; +} + +static int +aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo, + size_t *size, u32 *cmd_op) +{ + struct amdxdna_dev *xdna = cmd_abo->client->xdna; + int ret; + u32 op; + + op = amdxdna_cmd_get_op(cmd_abo); + if (*cmd_op == ERT_INVALID_CMD) + *cmd_op = op; + else if (op != *cmd_op) + return -EINVAL; + + switch (op) { + case ERT_START_CU: + ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size); + break; + case ERT_START_NPU: + ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size); + break; + case ERT_START_NPU_PREEMPT: + if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT)) + return -EOPNOTSUPP; + ret = EXEC_MSG_OPS(xdna)->fill_preempt_slot(cmd_abo, slot, size); + break; + case ERT_START_NPU_PREEMPT_ELF: + if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT)) + return -EOPNOTSUPP; + ret = EXEC_MSG_OPS(xdna)->fill_elf_slot(cmd_abo, slot, size); + break; + default: + XDNA_INFO(xdna, "Unsupported op %d", op); + ret = -EOPNOTSUPP; + break; } + return ret; } +void aie2_msg_init(struct amdxdna_dev_hdl *ndev) +{ + if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND)) + ndev->exec_msg_ops = &npu_exec_message_ops; + else + ndev->exec_msg_ops = &legacy_exec_message_ops; +} + static inline struct amdxdna_gem_obj * aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) { @@ -616,29 +869,36 @@ aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) return job->hwctx->priv->cmd_buf[idx]; } -static void -aie2_cmdlist_prepare_request(struct cmd_chain_req *req, - struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt) +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)) { - req->buf_addr = cmdbuf_abo->mem.dev_addr; - req->buf_size = size; - req->count = cnt; - drm_clflush_virt_range(cmdbuf_abo->mem.kva, size); - XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d", - req->buf_addr, size, cnt); -} + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; + struct xdna_mailbox_msg msg; + union exec_req req; + int ret; -static inline u32 -aie2_cmd_op_to_msg_op(u32 op) -{ - switch (op) { - case ERT_START_CU: - return MSG_OP_CHAIN_EXEC_BUFFER_CF; - case ERT_START_NPU: - return MSG_OP_CHAIN_EXEC_DPU; - default: - return MSG_OP_MAX_OPCODE; + if (!chann) + return -ENODEV; + + ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode); + if (ret) + return ret; + + msg.handle = job; + msg.notify_cb = notify_cb; + msg.send_data = (u8 *)&req; + print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req, + 0x40, false); + + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); + if (ret) { + XDNA_ERR(xdna, "Send message failed"); + return ret; } + + return 0; } int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, @@ -649,12 +909,13 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, struct mailbox_channel *chann = hwctx->priv->mbox_chann; struct amdxdna_client *client = hwctx->client; struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; + struct amdxdna_dev *xdna = client->xdna; struct amdxdna_cmd_chain *payload; struct xdna_mailbox_msg msg; - struct cmd_chain_req req; + union exec_chain_req req; u32 payload_len; u32 offset = 0; - u32 size; + size_t size; int ret; u32 op; u32 i; @@ -665,41 +926,42 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, payload_len < struct_size(payload, data, payload->command_count)) return -EINVAL; + op = ERT_INVALID_CMD; for (i = 0; i < payload->command_count; i++) { u32 boh = (u32)(payload->data[i]); struct amdxdna_gem_obj *abo; abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD); if (!abo) { - XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh); + XDNA_ERR(xdna, "Failed to find cmd BO %d", boh); return -ENOENT; } - /* All sub-cmd should have same op, use the first one. */ - if (i == 0) - op = amdxdna_cmd_get_op(abo); - - ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size); + size = cmdbuf_abo->mem.size - offset; + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset, + abo, &size, &op); amdxdna_gem_put_obj(abo); if (ret) - return -EINVAL; + return ret; offset += size; } + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op); + if (msg.opcode == MSG_OP_MAX_OPCODE) + return -EOPNOTSUPP; /* The offset is the accumulated total size of the cmd buffer */ - aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count); + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr, + offset, payload->command_count); + drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset); - msg.opcode = aie2_cmd_op_to_msg_op(op); - if (msg.opcode == MSG_OP_MAX_OPCODE) - return -EOPNOTSUPP; msg.handle = job; msg.notify_cb = notify_cb; msg.send_data = (u8 *)&req; msg.send_size = sizeof(req); ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); if (ret) { - XDNA_ERR(hwctx->client->xdna, "Send message failed"); + XDNA_ERR(xdna, "Send message failed"); return ret; } @@ -712,23 +974,27 @@ int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, { struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_dev *xdna = hwctx->client->xdna; struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; struct xdna_mailbox_msg msg; - struct cmd_chain_req req; - u32 size; + union exec_chain_req req; + u32 op = ERT_INVALID_CMD; + size_t size; int ret; - u32 op; - op = amdxdna_cmd_get_op(cmd_abo); - ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size); + size = cmdbuf_abo->mem.size; + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op); if (ret) return ret; - aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1); - - msg.opcode = aie2_cmd_op_to_msg_op(op); + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op); if (msg.opcode == MSG_OP_MAX_OPCODE) return -EOPNOTSUPP; + + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr, + size, 1); + drm_clflush_virt_range(cmdbuf_abo->mem.kva, size); + msg.handle = job; msg.notify_cb = notify_cb; msg.send_data = (u8 *)&req; @@ -753,7 +1019,7 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, int ret = 0; req.src_addr = 0; - req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr; + req.dst_addr = amdxdna_dev_bo_offset(abo); req.size = abo->mem.size; /* Device to Host */ @@ -777,3 +1043,32 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, return 0; } + +int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)) +{ + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]); + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct config_debug_bo_req req; + struct xdna_mailbox_msg msg; + + if (job->drv_cmd->opcode == ATTACH_DEBUG_BO) + req.config = DEBUG_BO_REGISTER; + else + req.config = DEBUG_BO_UNREGISTER; + + req.offset = amdxdna_dev_bo_offset(abo); + req.size = abo->mem.size; + + XDNA_DBG(xdna, "offset 0x%llx size 0x%llx config %d", + req.offset, req.size, req.config); + + msg.handle = job; + msg.notify_cb = notify_cb; + msg.send_data = (u8 *)&req; + msg.send_size = sizeof(req); + msg.opcode = MSG_OP_CONFIG_DEBUG_BO; + + return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); +} diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index 6df9065b13f6..1c957a6298d3 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -9,7 +9,8 @@ enum aie2_msg_opcode { MSG_OP_CREATE_CONTEXT = 0x2, MSG_OP_DESTROY_CONTEXT = 0x3, - MSG_OP_SYNC_BO = 0x7, + MSG_OP_GET_TELEMETRY = 0x4, + MSG_OP_SYNC_BO = 0x7, MSG_OP_EXECUTE_BUFFER_CF = 0xC, MSG_OP_QUERY_COL_STATUS = 0xD, MSG_OP_QUERY_AIE_TILE_INFO = 0xE, @@ -18,6 +19,8 @@ enum aie2_msg_opcode { MSG_OP_CONFIG_CU = 0x11, MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12, MSG_OP_CHAIN_EXEC_DPU = 0x13, + MSG_OP_CONFIG_DEBUG_BO = 0x14, + MSG_OP_CHAIN_EXEC_NPU = 0x18, MSG_OP_MAX_XRT_OPCODE, MSG_OP_SUSPEND = 0x101, MSG_OP_RESUME = 0x102, @@ -135,6 +138,28 @@ struct destroy_ctx_resp { enum aie2_msg_status status; } __packed; +enum telemetry_type { + TELEMETRY_TYPE_DISABLED, + TELEMETRY_TYPE_HEALTH, + TELEMETRY_TYPE_ERROR_INFO, + TELEMETRY_TYPE_PROFILING, + TELEMETRY_TYPE_DEBUG, + MAX_TELEMETRY_TYPE +}; + +struct get_telemetry_req { + enum telemetry_type type; + __u64 buf_addr; + __u32 buf_size; +} __packed; + +struct get_telemetry_resp { + __u32 major; + __u32 minor; + __u32 size; + enum aie2_msg_status status; +} __packed; + struct execute_buffer_req { __u32 cu_idx; __u32 payload[19]; @@ -148,6 +173,18 @@ struct exec_dpu_req { __u32 payload[35]; } __packed; +enum exec_npu_type { + EXEC_NPU_TYPE_NON_ELF = 0x1, + EXEC_NPU_TYPE_PARTIAL_ELF = 0x2, + EXEC_NPU_TYPE_PREEMPT = 0x3, + EXEC_NPU_TYPE_ELF = 0x4, +}; + +union exec_req { + struct execute_buffer_req ebuf; + struct exec_dpu_req dpu_req; +}; + struct execute_buffer_resp { enum aie2_msg_status status; } __packed; @@ -319,9 +356,6 @@ struct async_event_msg_resp { } __packed; #define MAX_CHAIN_CMDBUF_SIZE SZ_4K -#define slot_has_space(slot, offset, payload_size) \ - (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \ - sizeof(typeof(slot))) struct cmd_chain_slot_execbuf_cf { __u32 cu_idx; @@ -339,12 +373,41 @@ struct cmd_chain_slot_dpu { __u32 args[] __counted_by(arg_cnt); }; +#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32)) +#define AIE2_EXEC_BUFFER_KERNEL_OP_TXN 3 +struct cmd_chain_slot_npu { + enum exec_npu_type type; + u64 inst_buf_addr; + u64 save_buf_addr; + u64 restore_buf_addr; + u32 inst_size; + u32 save_size; + u32 restore_size; + u32 inst_prop_cnt; + u32 cu_idx; + u32 arg_cnt; + u32 args[] __counted_by(arg_cnt); +} __packed; + struct cmd_chain_req { __u64 buf_addr; __u32 buf_size; __u32 count; } __packed; +struct cmd_chain_npu_req { + u32 flags; + u32 reserved; + u64 buf_addr; + u32 buf_size; + u32 count; +} __packed; + +union exec_chain_req { + struct cmd_chain_npu_req npu_req; + struct cmd_chain_req req; +}; + struct cmd_chain_resp { enum aie2_msg_status status; __u32 fail_cmd_idx; @@ -365,4 +428,21 @@ struct sync_bo_req { struct sync_bo_resp { enum aie2_msg_status status; } __packed; + +#define DEBUG_BO_UNREGISTER 0 +#define DEBUG_BO_REGISTER 1 +struct config_debug_bo_req { + __u64 offset; + __u64 size; + /* + * config operations. + * DEBUG_BO_REGISTER: Register debug buffer + * DEBUG_BO_UNREGISTER: Unregister debug buffer + */ + __u32 config; +} __packed; + +struct config_debug_bo_resp { + enum aie2_msg_status status; +} __packed; #endif /* _AIE2_MSG_PRIV_H_ */ diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 87c425e3d2b9..ceef1c502e9e 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -25,6 +25,7 @@ #include "amdxdna_gem.h" #include "amdxdna_mailbox.h" #include "amdxdna_pci_drv.h" +#include "amdxdna_pm.h" static int aie2_max_col = XRS_MAX_COL; module_param(aie2_max_col, uint, 0600); @@ -54,6 +55,7 @@ struct mgmt_mbox_chann_info { static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor) { + const struct aie2_fw_feature_tbl *feature; struct amdxdna_dev *xdna = ndev->xdna; /* @@ -77,6 +79,17 @@ static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 f XDNA_ERR(xdna, "Firmware minor version smaller than supported"); return -EINVAL; } + + for (feature = ndev->priv->fw_feature_tbl; feature && feature->min_minor; + feature++) { + if (fw_minor < feature->min_minor) + continue; + if (feature->max_minor > 0 && fw_minor > feature->max_minor) + continue; + + set_bit(feature->feature, &ndev->feature_mask); + } + return 0; } @@ -170,6 +183,10 @@ int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, if (cfg->category != category) continue; + if (cfg->feature_mask && + bitmap_subset(&cfg->feature_mask, &ndev->feature_mask, AIE2_FEATURE_MAX)) + continue; + value = val ? *val : cfg->value; ret = aie2_set_runtime_cfg(ndev, cfg->type, value); if (ret) { @@ -223,15 +240,6 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev) return ret; } - if (!ndev->async_events) - return 0; - - ret = aie2_error_async_events_send(ndev); - if (ret) { - XDNA_ERR(ndev->xdna, "Send async events failed"); - return ret; - } - return 0; } @@ -257,6 +265,8 @@ static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev) return ret; } + ndev->total_col = min(aie2_max_col, ndev->metadata.cols); + return 0; } @@ -338,6 +348,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna) ndev->mbox = NULL; aie2_psp_stop(ndev->psp_hdl); aie2_smu_fini(ndev); + aie2_error_async_events_free(ndev); pci_disable_device(pdev); ndev->dev_status = AIE2_DEV_INIT; @@ -424,6 +435,18 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) goto destroy_mgmt_chann; } + ret = aie2_mgmt_fw_query(ndev); + if (ret) { + XDNA_ERR(xdna, "failed to query fw, ret %d", ret); + goto destroy_mgmt_chann; + } + + ret = aie2_error_async_events_alloc(ndev); + if (ret) { + XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret); + goto destroy_mgmt_chann; + } + ndev->dev_status = AIE2_DEV_START; return 0; @@ -459,7 +482,6 @@ static int aie2_hw_resume(struct amdxdna_dev *xdna) struct amdxdna_client *client; int ret; - guard(mutex)(&xdna->dev_lock); ret = aie2_hw_start(xdna); if (ret) { XDNA_ERR(xdna, "Start hardware failed, %d", ret); @@ -565,13 +587,6 @@ static int aie2_init(struct amdxdna_dev *xdna) goto release_fw; } - ret = aie2_mgmt_fw_query(ndev); - if (ret) { - XDNA_ERR(xdna, "Query firmware failed, ret %d", ret); - goto stop_hw; - } - ndev->total_col = min(aie2_max_col, ndev->metadata.cols); - xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1; for (i = 0; i < xrs_cfg.clk_list.num_levels; i++) xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk; @@ -587,30 +602,11 @@ static int aie2_init(struct amdxdna_dev *xdna) goto stop_hw; } - ret = aie2_error_async_events_alloc(ndev); - if (ret) { - XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret); - goto stop_hw; - } - - ret = aie2_error_async_events_send(ndev); - if (ret) { - XDNA_ERR(xdna, "Send async events failed, ret %d", ret); - goto async_event_free; - } - - /* Issue a command to make sure firmware handled async events */ - ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver); - if (ret) { - XDNA_ERR(xdna, "Re-query firmware version failed"); - goto async_event_free; - } - release_firmware(fw); + aie2_msg_init(ndev); + amdxdna_pm_init(xdna); return 0; -async_event_free: - aie2_error_async_events_free(ndev); stop_hw: aie2_hw_stop(xdna); release_fw: @@ -621,10 +617,8 @@ release_fw: static void aie2_fini(struct amdxdna_dev *xdna) { - struct amdxdna_dev_hdl *ndev = xdna->dev_handle; - + amdxdna_pm_fini(xdna); aie2_hw_stop(xdna); - aie2_error_async_events_free(ndev); } static int aie2_get_aie_status(struct amdxdna_client *client, @@ -845,7 +839,120 @@ static int aie2_get_hwctx_status(struct amdxdna_client *client, } args->buffer_size -= (u32)(array_args.buffer - args->buffer); - return ret; + return 0; +} + +static int aie2_query_resource_info(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_get_resource_info res_info; + const struct amdxdna_dev_priv *priv; + struct amdxdna_dev_hdl *ndev; + struct amdxdna_dev *xdna; + + xdna = client->xdna; + ndev = xdna->dev_handle; + priv = ndev->priv; + + res_info.npu_clk_max = priv->dpm_clk_tbl[ndev->max_dpm_level].hclk; + res_info.npu_tops_max = ndev->max_tops; + res_info.npu_task_max = priv->hwctx_limit; + res_info.npu_tops_curr = ndev->curr_tops; + res_info.npu_task_curr = ndev->hwctx_num; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &res_info, sizeof(res_info))) + return -EFAULT; + + return 0; +} + +static int aie2_fill_hwctx_map(struct amdxdna_hwctx *hwctx, void *arg) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + u32 *map = arg; + + if (hwctx->fw_ctx_id >= xdna->dev_handle->priv->hwctx_limit) { + XDNA_ERR(xdna, "Invalid fw ctx id %d/%d ", hwctx->fw_ctx_id, + xdna->dev_handle->priv->hwctx_limit); + return -EINVAL; + } + + map[hwctx->fw_ctx_id] = hwctx->id; + return 0; +} + +static int aie2_get_telemetry(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_telemetry_header *header __free(kfree) = NULL; + u32 telemetry_data_sz, header_sz, elem_num; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_client *tmp_client; + int ret; + + elem_num = xdna->dev_handle->priv->hwctx_limit; + header_sz = struct_size(header, map, elem_num); + if (args->buffer_size <= header_sz) { + XDNA_ERR(xdna, "Invalid buffer size"); + return -EINVAL; + } + + telemetry_data_sz = args->buffer_size - header_sz; + if (telemetry_data_sz > SZ_4M) { + XDNA_ERR(xdna, "Buffer size is too big, %d", telemetry_data_sz); + return -EINVAL; + } + + header = kzalloc(header_sz, GFP_KERNEL); + if (!header) + return -ENOMEM; + + if (copy_from_user(header, u64_to_user_ptr(args->buffer), sizeof(*header))) { + XDNA_ERR(xdna, "Failed to copy telemetry header from user"); + return -EFAULT; + } + + header->map_num_elements = elem_num; + list_for_each_entry(tmp_client, &xdna->client_list, node) { + ret = amdxdna_hwctx_walk(tmp_client, &header->map, + aie2_fill_hwctx_map); + if (ret) + return ret; + } + + ret = aie2_query_telemetry(xdna->dev_handle, + u64_to_user_ptr(args->buffer + header_sz), + telemetry_data_sz, header); + if (ret) { + XDNA_ERR(xdna, "Query telemetry failed ret %d", ret); + return ret; + } + + if (copy_to_user(u64_to_user_ptr(args->buffer), header, header_sz)) { + XDNA_ERR(xdna, "Copy header failed"); + return -EFAULT; + } + + return 0; +} + +static int aie2_get_preempt_state(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_attribute_state state = {}; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + + ndev = xdna->dev_handle; + if (args->param == DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE) + state.state = ndev->force_preempt_enabled; + else if (args->param == DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE) + state.state = ndev->frame_boundary_preempt; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &state, sizeof(state))) + return -EFAULT; + + return 0; } static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args) @@ -856,6 +963,10 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i if (!drm_dev_enter(&xdna->ddev, &idx)) return -ENODEV; + ret = amdxdna_pm_resume_get(xdna); + if (ret) + goto dev_exit; + switch (args->param) { case DRM_AMDXDNA_QUERY_AIE_STATUS: ret = aie2_get_aie_status(client, args); @@ -878,12 +989,25 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i case DRM_AMDXDNA_GET_POWER_MODE: ret = aie2_get_power_mode(client, args); break; + case DRM_AMDXDNA_QUERY_TELEMETRY: + ret = aie2_get_telemetry(client, args); + break; + case DRM_AMDXDNA_QUERY_RESOURCE_INFO: + ret = aie2_query_resource_info(client, args); + break; + case DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE: + case DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE: + ret = aie2_get_preempt_state(client, args); + break; default: XDNA_ERR(xdna, "Not supported request parameter %u", args->param); ret = -EOPNOTSUPP; } + + amdxdna_pm_suspend_put(xdna); XDNA_DBG(xdna, "Got param %d", args->param); +dev_exit: drm_dev_exit(idx); return ret; } @@ -898,6 +1022,12 @@ static int aie2_query_ctx_status_array(struct amdxdna_client *client, drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + if (args->element_size > SZ_4K || args->num_element > SZ_1K) { + XDNA_DBG(xdna, "Invalid element size %d or number of element %d", + args->element_size, args->num_element); + return -EINVAL; + } + array_args.element_size = min(args->element_size, sizeof(struct amdxdna_drm_hwctx_entry)); array_args.buffer = args->buffer; @@ -914,7 +1044,7 @@ static int aie2_query_ctx_status_array(struct amdxdna_client *client, args->num_element = (u32)((array_args.buffer - args->buffer) / args->element_size); - return ret; + return 0; } static int aie2_get_array(struct amdxdna_client *client, @@ -926,16 +1056,26 @@ static int aie2_get_array(struct amdxdna_client *client, if (!drm_dev_enter(&xdna->ddev, &idx)) return -ENODEV; + ret = amdxdna_pm_resume_get(xdna); + if (ret) + goto dev_exit; + switch (args->param) { case DRM_AMDXDNA_HW_CONTEXT_ALL: ret = aie2_query_ctx_status_array(client, args); break; + case DRM_AMDXDNA_HW_LAST_ASYNC_ERR: + ret = aie2_get_array_async_error(xdna->dev_handle, args); + break; default: XDNA_ERR(xdna, "Not supported request parameter %u", args->param); ret = -EOPNOTSUPP; } + + amdxdna_pm_suspend_put(xdna); XDNA_DBG(xdna, "Got param %d", args->param); +dev_exit: drm_dev_exit(idx); return ret; } @@ -965,6 +1105,38 @@ static int aie2_set_power_mode(struct amdxdna_client *client, return aie2_pm_set_mode(xdna->dev_handle, power_mode); } +static int aie2_set_preempt_state(struct amdxdna_client *client, + struct amdxdna_drm_set_state *args) +{ + struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle; + struct amdxdna_drm_attribute_state state; + u32 val; + int ret; + + if (copy_from_user(&state, u64_to_user_ptr(args->buffer), sizeof(state))) + return -EFAULT; + + if (state.state > 1) + return -EINVAL; + + if (XDNA_MBZ_DBG(client->xdna, state.pad, sizeof(state.pad))) + return -EINVAL; + + if (args->param == DRM_AMDXDNA_SET_FORCE_PREEMPT) { + ndev->force_preempt_enabled = state.state; + } else if (args->param == DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT) { + val = state.state; + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT, + &val); + if (ret) + return ret; + + ndev->frame_boundary_preempt = state.state; + } + + return 0; +} + static int aie2_set_state(struct amdxdna_client *client, struct amdxdna_drm_set_state *args) { @@ -974,16 +1146,26 @@ static int aie2_set_state(struct amdxdna_client *client, if (!drm_dev_enter(&xdna->ddev, &idx)) return -ENODEV; + ret = amdxdna_pm_resume_get(xdna); + if (ret) + goto dev_exit; + switch (args->param) { case DRM_AMDXDNA_SET_POWER_MODE: ret = aie2_set_power_mode(client, args); break; + case DRM_AMDXDNA_SET_FORCE_PREEMPT: + case DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT: + ret = aie2_set_preempt_state(client, args); + break; default: XDNA_ERR(xdna, "Not supported request parameter %u", args->param); ret = -EOPNOTSUPP; break; } + amdxdna_pm_suspend_put(xdna); +dev_exit: drm_dev_exit(idx); return ret; } @@ -998,6 +1180,7 @@ const struct amdxdna_dev_ops aie2_ops = { .hwctx_init = aie2_hwctx_init, .hwctx_fini = aie2_hwctx_fini, .hwctx_config = aie2_hwctx_config, + .hwctx_sync_debug_bo = aie2_hwctx_sync_debug_bo, .cmd_submit = aie2_cmd_submit, .hmm_invalidate = aie2_hmm_invalidate, .get_array = aie2_get_array, diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index 91a8e948f82a..a5f9c42155d1 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -110,12 +110,15 @@ struct aie_metadata { enum rt_config_category { AIE2_RT_CFG_INIT, AIE2_RT_CFG_CLK_GATING, + AIE2_RT_CFG_FORCE_PREEMPT, + AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT, }; struct rt_config { u32 type; u32 value; u32 category; + unsigned long feature_mask; }; struct dpm_clk_freq { @@ -156,6 +159,19 @@ enum aie2_dev_status { AIE2_DEV_START, }; +struct aie2_exec_msg_ops { + int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op); + int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op); + void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt); + int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + u32 (*get_chain_msg_op)(u32 cmd_op); +}; + struct amdxdna_dev_hdl { struct amdxdna_dev *xdna; const struct amdxdna_dev_priv *priv; @@ -173,6 +189,8 @@ struct amdxdna_dev_hdl { u32 total_col; struct aie_version version; struct aie_metadata metadata; + unsigned long feature_mask; + struct aie2_exec_msg_ops *exec_msg_ops; /* power management and clock*/ enum amdxdna_power_mode_type pw_mode; @@ -182,6 +200,10 @@ struct amdxdna_dev_hdl { u32 clk_gating; u32 npuclk_freq; u32 hclk_freq; + u32 max_tops; + u32 curr_tops; + u32 force_preempt_enabled; + u32 frame_boundary_preempt; /* Mailbox and the management channel */ struct mailbox *mbox; @@ -190,6 +212,8 @@ struct amdxdna_dev_hdl { enum aie2_dev_status dev_status; u32 hwctx_num; + + struct amdxdna_async_error last_async_err; }; #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \ @@ -204,12 +228,27 @@ struct aie2_hw_ops { int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level); }; +enum aie2_fw_feature { + AIE2_NPU_COMMAND, + AIE2_PREEMPT, + AIE2_FEATURE_MAX +}; + +struct aie2_fw_feature_tbl { + enum aie2_fw_feature feature; + u32 max_minor; + u32 min_minor; +}; + +#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask) + struct amdxdna_dev_priv { const char *fw_path; u64 protocol_major; u64 protocol_minor; const struct rt_config *rt_config; const struct dpm_clk_freq *dpm_clk_tbl; + const struct aie2_fw_feature_tbl *fw_feature_tbl; #define COL_ALIGN_NONE 0 #define COL_ALIGN_NATURE 1 @@ -217,6 +256,7 @@ struct amdxdna_dev_priv { u32 mbox_dev_addr; /* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */ u32 mbox_size; + u32 hwctx_limit; u32 sram_dev_addr; struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX]; struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS]; @@ -234,6 +274,7 @@ extern const struct dpm_clk_freq npu1_dpm_clk_table[]; extern const struct dpm_clk_freq npu4_dpm_clk_table[]; extern const struct rt_config npu1_default_rt_cfg[]; extern const struct rt_config npu4_default_rt_cfg[]; +extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[]; /* aie2_smu.c */ int aie2_smu_init(struct amdxdna_dev_hdl *ndev); @@ -253,10 +294,12 @@ void aie2_psp_stop(struct psp_device *psp); /* aie2_error.c */ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev); void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev); -int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev); int aie2_error_async_msg_thread(void *data); +int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, + struct amdxdna_drm_get_array *args); /* aie2_message.c */ +void aie2_msg_init(struct amdxdna_dev_hdl *ndev); int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev); int aie2_resume_fw(struct amdxdna_dev_hdl *ndev); int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value); @@ -270,9 +313,13 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled); +int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, + char __user *buf, u32 size, + struct amdxdna_drm_query_telemetry_header *header); int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, void *handle, int (*cb)(void*, void __iomem *, size_t)); -int aie2_config_cu(struct amdxdna_hwctx *hwctx); +int aie2_config_cu(struct amdxdna_hwctx *hwctx, + int (*notify_cb)(void *, void __iomem *, size_t)); int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, int (*notify_cb)(void *, void __iomem *, size_t)); int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, @@ -283,11 +330,14 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, int (*notify_cb)(void *, void __iomem *, size_t)); int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, int (*notify_cb)(void *, void __iomem *, size_t)); +int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)); /* aie2_hwctx.c */ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx); int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); +int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl); void aie2_hwctx_suspend(struct amdxdna_client *client); int aie2_hwctx_resume(struct amdxdna_client *client); int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c index d303701b0ded..bd94ee96c2bc 100644 --- a/drivers/accel/amdxdna/aie2_smu.c +++ b/drivers/accel/amdxdna/aie2_smu.c @@ -11,6 +11,7 @@ #include "aie2_pci.h" #include "amdxdna_pci_drv.h" +#include "amdxdna_pm.h" #define SMU_RESULT_OK 1 @@ -22,6 +23,13 @@ #define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7 #define AIE2_SMU_SET_HARD_DPMLEVEL 0x8 +#define NPU4_DPM_TOPS(ndev, dpm_level) \ +({ \ + typeof(ndev) _ndev = ndev; \ + (4096 * (_ndev)->total_col * \ + (_ndev)->priv->dpm_clk_tbl[dpm_level].hclk / 1000000); \ +}) + static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg, u32 *out) { @@ -59,12 +67,16 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) u32 freq; int ret; + ret = amdxdna_pm_resume_get(ndev->xdna); + if (ret) + return ret; + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq); if (ret) { XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n", ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret); - return ret; + goto suspend_put; } ndev->npuclk_freq = freq; @@ -73,49 +85,78 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) if (ret) { XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n", ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret); - return ret; + goto suspend_put; } + + amdxdna_pm_suspend_put(ndev->xdna); ndev->hclk_freq = freq; ndev->dpm_level = dpm_level; + ndev->max_tops = 2 * ndev->total_col; + ndev->curr_tops = ndev->max_tops * freq / 1028; XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n", ndev->npuclk_freq, ndev->hclk_freq); return 0; + +suspend_put: + amdxdna_pm_suspend_put(ndev->xdna); + return ret; } int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) { int ret; + ret = amdxdna_pm_resume_get(ndev->xdna); + if (ret) + return ret; + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL); if (ret) { XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ", dpm_level, ret); - return ret; + goto suspend_put; } ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL); if (ret) { XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d", dpm_level, ret); - return ret; + goto suspend_put; } + amdxdna_pm_suspend_put(ndev->xdna); ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk; ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk; ndev->dpm_level = dpm_level; + ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level); + ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level); XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n", ndev->npuclk_freq, ndev->hclk_freq); return 0; + +suspend_put: + amdxdna_pm_suspend_put(ndev->xdna); + return ret; } int aie2_smu_init(struct amdxdna_dev_hdl *ndev) { int ret; + /* + * Failing to set power off indicates an unrecoverable hardware or + * firmware error. + */ + ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL); + if (ret) { + XDNA_ERR(ndev->xdna, "Access power failed, ret %d", ret); + return ret; + } + ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL); if (ret) { XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret); diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 4bfe4ef20550..d17aef89a0ad 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -113,14 +113,14 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size) return &cmd->data[num_masks]; } -int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) +u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) { struct amdxdna_cmd *cmd = abo->mem.kva; u32 num_masks, i; u32 *cu_mask; if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) - return -1; + return INVALID_CU_IDX; num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); cu_mask = cmd->data; @@ -129,7 +129,7 @@ int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) return ffs(cu_mask[i]) - 1; } - return -1; + return INVALID_CU_IDX; } /* @@ -161,19 +161,14 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr if (args->ext || args->ext_flags) return -EINVAL; - if (!drm_dev_enter(dev, &idx)) - return -ENODEV; - hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL); - if (!hwctx) { - ret = -ENOMEM; - goto exit; - } + if (!hwctx) + return -ENOMEM; if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) { XDNA_ERR(xdna, "Access QoS info failed"); - ret = -EFAULT; - goto free_hwctx; + kfree(hwctx); + return -EFAULT; } hwctx->client = client; @@ -181,30 +176,36 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr hwctx->num_tiles = args->num_tiles; hwctx->mem_size = args->mem_size; hwctx->max_opc = args->max_opc; - ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx, - XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID), - &client->next_hwctxid, GFP_KERNEL); - if (ret < 0) { - XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret); + + guard(mutex)(&xdna->dev_lock); + + if (!drm_dev_enter(dev, &idx)) { + ret = -ENODEV; goto free_hwctx; } - hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id); + ret = xdna->dev_info->ops->hwctx_init(hwctx); + if (ret) { + XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret); + goto dev_exit; + } + + hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->fw_ctx_id); if (!hwctx->name) { ret = -ENOMEM; - goto rm_id; + goto fini_hwctx; } - mutex_lock(&xdna->dev_lock); - ret = xdna->dev_info->ops->hwctx_init(hwctx); - if (ret) { - mutex_unlock(&xdna->dev_lock); - XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret); + ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx, + XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID), + &client->next_hwctxid, GFP_KERNEL); + if (ret < 0) { + XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret); goto free_name; } + args->handle = hwctx->id; args->syncobj_handle = hwctx->syncobj_hdl; - mutex_unlock(&xdna->dev_lock); atomic64_set(&hwctx->job_submit_cnt, 0); atomic64_set(&hwctx->job_free_cnt, 0); @@ -214,12 +215,12 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr free_name: kfree(hwctx->name); -rm_id: - xa_erase(&client->hwctx_xa, hwctx->id); +fini_hwctx: + xdna->dev_info->ops->hwctx_fini(hwctx); +dev_exit: + drm_dev_exit(idx); free_hwctx: kfree(hwctx); -exit: - drm_dev_exit(idx); return ret; } @@ -327,6 +328,38 @@ unlock_srcu: return ret; } +int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl) +{ + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_hwctx *hwctx; + struct amdxdna_gem_obj *abo; + struct drm_gem_object *gobj; + int ret, idx; + + if (!xdna->dev_info->ops->hwctx_sync_debug_bo) + return -EOPNOTSUPP; + + gobj = drm_gem_object_lookup(client->filp, debug_bo_hdl); + if (!gobj) + return -EINVAL; + + abo = to_xdna_obj(gobj); + guard(mutex)(&xdna->dev_lock); + idx = srcu_read_lock(&client->hwctx_srcu); + hwctx = xa_load(&client->hwctx_xa, abo->assigned_hwctx); + if (!hwctx) { + ret = -EINVAL; + goto unlock_srcu; + } + + ret = xdna->dev_info->ops->hwctx_sync_debug_bo(hwctx, debug_bo_hdl); + +unlock_srcu: + srcu_read_unlock(&client->hwctx_srcu, idx); + drm_gem_object_put(gobj); + return ret; +} + static void amdxdna_arg_bos_put(struct amdxdna_sched_job *job) { @@ -389,9 +422,11 @@ void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job) trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release"); amdxdna_arg_bos_put(job); amdxdna_gem_put_obj(job->cmd_bo); + dma_fence_put(job->fence); } int amdxdna_cmd_submit(struct amdxdna_client *client, + struct amdxdna_drv_cmd *drv_cmd, u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt, u32 hwctx_hdl, u64 *seq) { @@ -405,6 +440,8 @@ int amdxdna_cmd_submit(struct amdxdna_client *client, if (!job) return -ENOMEM; + job->drv_cmd = drv_cmd; + if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) { job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD); if (!job->cmd_bo) { @@ -412,8 +449,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client, ret = -EINVAL; goto free_job; } - } else { - job->cmd_bo = NULL; } ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt); @@ -431,11 +466,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client, goto unlock_srcu; } - if (hwctx->status != HWCTX_STAT_READY) { - XDNA_ERR(xdna, "HW Context is not ready"); - ret = -EINVAL; - goto unlock_srcu; - } job->hwctx = hwctx; job->mm = current->mm; @@ -512,7 +542,7 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client, } } - ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls, + ret = amdxdna_cmd_submit(client, NULL, cmd_bo_hdl, arg_bo_hdls, args->arg_count, args->hwctx, &args->seq); if (ret) XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret); diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h index 7cd7a55936f0..b6151244d64f 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.h +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -13,9 +13,12 @@ struct amdxdna_hwctx_priv; enum ert_cmd_opcode { - ERT_START_CU = 0, - ERT_CMD_CHAIN = 19, - ERT_START_NPU = 20, + ERT_START_CU = 0, + ERT_CMD_CHAIN = 19, + ERT_START_NPU = 20, + ERT_START_NPU_PREEMPT = 21, + ERT_START_NPU_PREEMPT_ELF = 22, + ERT_INVALID_CMD = ~0U, }; enum ert_cmd_state { @@ -54,6 +57,21 @@ struct amdxdna_cmd_chain { u64 data[] __counted_by(command_count); }; +/* + * Interpretation of the beginning of data payload for ERT_START_NPU_PREEMPT in + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args. + */ +struct amdxdna_cmd_preempt_data { + u64 inst_buf; /* instruction buffer address */ + u64 save_buf; /* save buffer address */ + u64 restore_buf; /* restore buffer address */ + u32 inst_size; /* size of instruction buffer in bytes */ + u32 save_size; /* size of save buffer in bytes */ + u32 restore_size; /* size of restore buffer in bytes */ + u32 inst_prop_cnt; /* properties count */ + u32 prop_args[]; /* properties and regular kernel arguments */ +}; + /* Exec buffer command header format */ #define AMDXDNA_CMD_STATE GENMASK(3, 0) #define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10) @@ -64,6 +82,8 @@ struct amdxdna_cmd { u32 data[]; }; +#define INVALID_CU_IDX (~0U) + struct amdxdna_hwctx { struct amdxdna_client *client; struct amdxdna_hwctx_priv *priv; @@ -95,6 +115,17 @@ struct amdxdna_hwctx { #define drm_job_to_xdna_job(j) \ container_of(j, struct amdxdna_sched_job, base) +enum amdxdna_job_opcode { + SYNC_DEBUG_BO, + ATTACH_DEBUG_BO, + DETACH_DEBUG_BO, +}; + +struct amdxdna_drv_cmd { + enum amdxdna_job_opcode opcode; + u32 result; +}; + struct amdxdna_sched_job { struct drm_sched_job base; struct kref refcnt; @@ -105,7 +136,9 @@ struct amdxdna_sched_job { /* user can wait on this fence */ struct dma_fence *out_fence; bool job_done; + bool job_timeout; u64 seq; + struct amdxdna_drv_cmd *drv_cmd; struct amdxdna_gem_obj *cmd_bo; size_t bo_cnt; struct drm_gem_object *bos[] __counted_by(bo_cnt); @@ -137,15 +170,17 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo) } void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size); -int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); +u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job); void amdxdna_hwctx_remove_all(struct amdxdna_client *client); int amdxdna_hwctx_walk(struct amdxdna_client *client, void *arg, int (*walk)(struct amdxdna_hwctx *hwctx, void *arg)); +int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl); int amdxdna_cmd_submit(struct amdxdna_client *client, - u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt, + struct amdxdna_drv_cmd *drv_cmd, u32 cmd_bo_hdls, + u32 *arg_bo_hdls, u32 arg_bo_cnt, u32 hwctx_hdl, u64 *seq); int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl, diff --git a/drivers/accel/amdxdna/amdxdna_error.h b/drivers/accel/amdxdna/amdxdna_error.h new file mode 100644 index 000000000000..c51de86ec12b --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_error.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_ERROR_H_ +#define _AMDXDNA_ERROR_H_ + +#include <linux/bitfield.h> +#include <linux/bits.h> + +#define AMDXDNA_ERR_DRV_AIE 4 +#define AMDXDNA_ERR_SEV_CRITICAL 3 +#define AMDXDNA_ERR_CLASS_AIE 2 + +#define AMDXDNA_ERR_NUM_MASK GENMASK_U64(15, 0) +#define AMDXDNA_ERR_DRV_MASK GENMASK_U64(23, 16) +#define AMDXDNA_ERR_SEV_MASK GENMASK_U64(31, 24) +#define AMDXDNA_ERR_MOD_MASK GENMASK_U64(39, 32) +#define AMDXDNA_ERR_CLASS_MASK GENMASK_U64(47, 40) + +enum amdxdna_error_num { + AMDXDNA_ERROR_NUM_AIE_SATURATION = 3, + AMDXDNA_ERROR_NUM_AIE_FP, + AMDXDNA_ERROR_NUM_AIE_STREAM, + AMDXDNA_ERROR_NUM_AIE_ACCESS, + AMDXDNA_ERROR_NUM_AIE_BUS, + AMDXDNA_ERROR_NUM_AIE_INSTRUCTION, + AMDXDNA_ERROR_NUM_AIE_ECC, + AMDXDNA_ERROR_NUM_AIE_LOCK, + AMDXDNA_ERROR_NUM_AIE_DMA, + AMDXDNA_ERROR_NUM_AIE_MEM_PARITY, + AMDXDNA_ERROR_NUM_UNKNOWN = 15, +}; + +enum amdxdna_error_module { + AMDXDNA_ERROR_MODULE_AIE_CORE = 3, + AMDXDNA_ERROR_MODULE_AIE_MEMORY, + AMDXDNA_ERROR_MODULE_AIE_SHIM, + AMDXDNA_ERROR_MODULE_AIE_NOC, + AMDXDNA_ERROR_MODULE_AIE_PL, + AMDXDNA_ERROR_MODULE_UNKNOWN = 8, +}; + +#define AMDXDNA_ERROR_ENCODE(err_num, err_mod) \ + (FIELD_PREP(AMDXDNA_ERR_NUM_MASK, err_num) | \ + FIELD_PREP_CONST(AMDXDNA_ERR_DRV_MASK, AMDXDNA_ERR_DRV_AIE) | \ + FIELD_PREP_CONST(AMDXDNA_ERR_SEV_MASK, AMDXDNA_ERR_SEV_CRITICAL) | \ + FIELD_PREP(AMDXDNA_ERR_MOD_MASK, err_mod) | \ + FIELD_PREP_CONST(AMDXDNA_ERR_CLASS_MASK, AMDXDNA_ERR_CLASS_AIE)) + +#define AMDXDNA_EXTRA_ERR_COL_MASK GENMASK_U64(7, 0) +#define AMDXDNA_EXTRA_ERR_ROW_MASK GENMASK_U64(15, 8) + +#define AMDXDNA_EXTRA_ERR_ENCODE(row, col) \ + (FIELD_PREP(AMDXDNA_EXTRA_ERR_COL_MASK, col) | \ + FIELD_PREP(AMDXDNA_EXTRA_ERR_ROW_MASK, row)) + +#endif /* _AMDXDNA_ERROR_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index d407a36eb412..dfa916eeb2d9 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -8,6 +8,7 @@ #include <drm/drm_device.h> #include <drm/drm_gem.h> #include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> #include <drm/gpu_scheduler.h> #include <linux/dma-buf.h> #include <linux/dma-direct.h> @@ -392,35 +393,33 @@ static const struct dma_buf_ops amdxdna_dmabuf_ops = { .vunmap = drm_gem_dmabuf_vunmap, }; -static int amdxdna_gem_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map) +static int amdxdna_gem_obj_vmap(struct amdxdna_gem_obj *abo, void **vaddr) { - struct amdxdna_gem_obj *abo = to_xdna_obj(obj); - - iosys_map_clear(map); - - dma_resv_assert_held(obj->resv); + struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL); + int ret; if (is_import_bo(abo)) - dma_buf_vmap(abo->dma_buf, map); + ret = dma_buf_vmap_unlocked(abo->dma_buf, &map); else - drm_gem_shmem_object_vmap(obj, map); + ret = drm_gem_vmap(to_gobj(abo), &map); - if (!map->vaddr) - return -ENOMEM; - - return 0; + *vaddr = map.vaddr; + return ret; } -static void amdxdna_gem_obj_vunmap(struct drm_gem_object *obj, struct iosys_map *map) +static void amdxdna_gem_obj_vunmap(struct amdxdna_gem_obj *abo) { - struct amdxdna_gem_obj *abo = to_xdna_obj(obj); + struct iosys_map map; + + if (!abo->mem.kva) + return; - dma_resv_assert_held(obj->resv); + iosys_map_set_vaddr(&map, abo->mem.kva); if (is_import_bo(abo)) - dma_buf_vunmap(abo->dma_buf, map); + dma_buf_vunmap_unlocked(abo->dma_buf, &map); else - drm_gem_shmem_object_vunmap(obj, map); + drm_gem_vunmap(to_gobj(abo), &map); } static struct dma_buf *amdxdna_gem_prime_export(struct drm_gem_object *gobj, int flags) @@ -455,7 +454,6 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) { struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); - struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva); XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr); @@ -468,7 +466,7 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) if (abo->type == AMDXDNA_BO_DEV_HEAP) drm_mm_takedown(&abo->mm); - drm_gem_vunmap(gobj, &map); + amdxdna_gem_obj_vunmap(abo); mutex_destroy(&abo->lock); if (is_import_bo(abo)) { @@ -489,8 +487,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = { .pin = drm_gem_shmem_object_pin, .unpin = drm_gem_shmem_object_unpin, .get_sg_table = drm_gem_shmem_object_get_sg_table, - .vmap = amdxdna_gem_obj_vmap, - .vunmap = amdxdna_gem_obj_vunmap, + .vmap = drm_gem_shmem_object_vmap, + .vunmap = drm_gem_shmem_object_vunmap, .mmap = amdxdna_gem_obj_mmap, .vm_ops = &drm_gem_shmem_vm_ops, .export = amdxdna_gem_prime_export, @@ -663,7 +661,6 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev, struct drm_file *filp) { struct amdxdna_client *client = filp->driver_priv; - struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL); struct amdxdna_dev *xdna = to_xdna_dev(dev); struct amdxdna_gem_obj *abo; int ret; @@ -692,12 +689,11 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev, abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base; drm_mm_init(&abo->mm, abo->mem.dev_addr, abo->mem.size); - ret = drm_gem_vmap(to_gobj(abo), &map); + ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva); if (ret) { XDNA_ERR(xdna, "Vmap heap bo failed, ret %d", ret); goto release_obj; } - abo->mem.kva = map.vaddr; client->dev_heap = abo; drm_gem_object_get(to_gobj(abo)); @@ -748,7 +744,6 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev, struct amdxdna_drm_create_bo *args, struct drm_file *filp) { - struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL); struct amdxdna_dev *xdna = to_xdna_dev(dev); struct amdxdna_gem_obj *abo; int ret; @@ -770,12 +765,11 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev, abo->type = AMDXDNA_BO_CMD; abo->client = filp->driver_priv; - ret = drm_gem_vmap(to_gobj(abo), &map); + ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva); if (ret) { XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret); goto release_obj; } - abo->mem.kva = map.vaddr; return abo; @@ -969,6 +963,9 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, XDNA_DBG(xdna, "Sync bo %d offset 0x%llx, size 0x%llx\n", args->handle, args->offset, args->size); + if (args->direction == SYNC_DIRECT_FROM_DEVICE) + ret = amdxdna_hwctx_sync_debug_bo(abo->client, args->handle); + put_obj: drm_gem_object_put(gobj); return ret; diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h index ae29db94a9d3..f79fc7f3c93b 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.h +++ b/drivers/accel/amdxdna/amdxdna_gem.h @@ -7,6 +7,7 @@ #define _AMDXDNA_GEM_H_ #include <linux/hmm.h> +#include "amdxdna_pci_drv.h" struct amdxdna_umap { struct vm_area_struct *vma; @@ -62,6 +63,11 @@ static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo) drm_gem_object_put(to_gobj(abo)); } +static inline u64 amdxdna_dev_bo_offset(struct amdxdna_gem_obj *abo) +{ + return abo->mem.dev_addr - abo->client->dev_heap->mem.dev_addr; +} + void amdxdna_umap_put(struct amdxdna_umap *mapp); struct drm_gem_object * diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c index da1ac89bb78f..858df97cd3fb 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.c +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c @@ -194,7 +194,8 @@ static void mailbox_release_msg(struct mailbox_channel *mb_chann, { MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x", mb_msg->pkg.header.id, mb_msg->pkg.header.opcode); - mb_msg->notify_cb(mb_msg->handle, NULL, 0); + if (mb_msg->notify_cb) + mb_msg->notify_cb(mb_msg->handle, NULL, 0); kfree(mb_msg); } @@ -248,7 +249,7 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade { struct mailbox_msg *mb_msg; int msg_id; - int ret; + int ret = 0; msg_id = header->id; if (!mailbox_validate_msgid(msg_id)) { @@ -265,9 +266,11 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x", header->opcode, header->total_size, header->id); - ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size); - if (unlikely(ret)) - MB_ERR(mb_chann, "Message callback ret %d", ret); + if (mb_msg->notify_cb) { + ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size); + if (unlikely(ret)) + MB_ERR(mb_chann, "Message callback ret %d", ret); + } kfree(mb_msg); return ret; @@ -513,6 +516,7 @@ xdna_mailbox_create_channel(struct mailbox *mb, } mb_chann->bad_state = false; + mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0); MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq); return mb_chann; diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h index 710ff8873d61..556c712cad0a 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h +++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h @@ -16,16 +16,18 @@ struct xdna_notify { u32 *data; size_t size; int error; + u32 *status; }; -#define DECLARE_XDNA_MSG_COMMON(name, op, status) \ +#define DECLARE_XDNA_MSG_COMMON(name, op, s) \ struct name##_req req = { 0 }; \ - struct name##_resp resp = { status }; \ + struct name##_resp resp = { .status = s }; \ struct xdna_notify hdl = { \ .error = 0, \ .data = (u32 *)&resp, \ .size = sizeof(resp), \ .comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp), \ + .status = (u32 *)&resp.status, \ }; \ struct xdna_mailbox_msg msg = { \ .send_data = (u8 *)&req, \ diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 569cd703729d..1973ab67721b 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -13,13 +13,11 @@ #include <drm/gpu_scheduler.h> #include <linux/iommu.h> #include <linux/pci.h> -#include <linux/pm_runtime.h> #include "amdxdna_ctx.h" #include "amdxdna_gem.h" #include "amdxdna_pci_drv.h" - -#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */ +#include "amdxdna_pm.h" MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin"); MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin"); @@ -29,9 +27,14 @@ MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); /* * 0.0: Initial version * 0.1: Support getting all hardware contexts by DRM_IOCTL_AMDXDNA_GET_ARRAY + * 0.2: Support getting last error hardware error + * 0.3: Support firmware debug buffer + * 0.4: Support getting resource information + * 0.5: Support getting telemetry data + * 0.6: Support preemption */ #define AMDXDNA_DRIVER_MAJOR 0 -#define AMDXDNA_DRIVER_MINOR 1 +#define AMDXDNA_DRIVER_MINOR 6 /* * Bind the driver base on (vendor_id, device_id) pair and later use the @@ -61,17 +64,9 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp) struct amdxdna_client *client; int ret; - ret = pm_runtime_resume_and_get(ddev->dev); - if (ret) { - XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret); - return ret; - } - client = kzalloc(sizeof(*client), GFP_KERNEL); - if (!client) { - ret = -ENOMEM; - goto put_rpm; - } + if (!client) + return -ENOMEM; client->pid = pid_nr(rcu_access_pointer(filp->pid)); client->xdna = xdna; @@ -106,9 +101,6 @@ unbind_sva: iommu_sva_unbind_device(client->sva); failed: kfree(client); -put_rpm: - pm_runtime_mark_last_busy(ddev->dev); - pm_runtime_put_autosuspend(ddev->dev); return ret; } @@ -130,8 +122,6 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp) XDNA_DBG(xdna, "pid %d closed", client->pid); kfree(client); - pm_runtime_mark_last_busy(ddev->dev); - pm_runtime_put_autosuspend(ddev->dev); } static int amdxdna_flush(struct file *f, fl_owner_t id) @@ -310,19 +300,12 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto failed_dev_fini; } - pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY); - pm_runtime_use_autosuspend(dev); - pm_runtime_allow(dev); - ret = drm_dev_register(&xdna->ddev, 0); if (ret) { XDNA_ERR(xdna, "DRM register failed, ret %d", ret); - pm_runtime_forbid(dev); goto failed_sysfs_fini; } - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); return 0; failed_sysfs_fini: @@ -339,14 +322,10 @@ destroy_notifier_wq: static void amdxdna_remove(struct pci_dev *pdev) { struct amdxdna_dev *xdna = pci_get_drvdata(pdev); - struct device *dev = &pdev->dev; struct amdxdna_client *client; destroy_workqueue(xdna->notifier_wq); - pm_runtime_get_noresume(dev); - pm_runtime_forbid(dev); - drm_dev_unplug(&xdna->ddev); amdxdna_sysfs_fini(xdna); @@ -365,29 +344,9 @@ static void amdxdna_remove(struct pci_dev *pdev) mutex_unlock(&xdna->dev_lock); } -static int amdxdna_pmops_suspend(struct device *dev) -{ - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev)); - - if (!xdna->dev_info->ops->suspend) - return -EOPNOTSUPP; - - return xdna->dev_info->ops->suspend(xdna); -} - -static int amdxdna_pmops_resume(struct device *dev) -{ - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev)); - - if (!xdna->dev_info->ops->resume) - return -EOPNOTSUPP; - - return xdna->dev_info->ops->resume(xdna); -} - static const struct dev_pm_ops amdxdna_pm_ops = { - SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume) - RUNTIME_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume, NULL) + SYSTEM_SLEEP_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume) + RUNTIME_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume, NULL) }; static struct pci_driver amdxdna_pci_driver = { diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h index 72d6696d49da..c99477f5e454 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -6,6 +6,7 @@ #ifndef _AMDXDNA_PCI_DRV_H_ #define _AMDXDNA_PCI_DRV_H_ +#include <drm/drm_print.h> #include <linux/workqueue.h> #include <linux/xarray.h> @@ -54,6 +55,7 @@ struct amdxdna_dev_ops { int (*hwctx_init)(struct amdxdna_hwctx *hwctx); void (*hwctx_fini)(struct amdxdna_hwctx *hwctx); int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); + int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl); void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq); int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args); @@ -99,6 +101,7 @@ struct amdxdna_dev { struct amdxdna_fw_ver fw_ver; struct rw_semaphore notifier_lock; /* for mmu notifier*/ struct workqueue_struct *notifier_wq; + bool rpm_on; }; /* diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c new file mode 100644 index 000000000000..fa38e65d617c --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_pm.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_drv.h> +#include <linux/pm_runtime.h> + +#include "amdxdna_pm.h" + +#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */ + +int amdxdna_pm_suspend(struct device *dev) +{ + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); + int ret = -EOPNOTSUPP; + bool rpm; + + if (xdna->dev_info->ops->suspend) { + rpm = xdna->rpm_on; + xdna->rpm_on = false; + ret = xdna->dev_info->ops->suspend(xdna); + xdna->rpm_on = rpm; + } + + XDNA_DBG(xdna, "Suspend done ret %d", ret); + return ret; +} + +int amdxdna_pm_resume(struct device *dev) +{ + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); + int ret = -EOPNOTSUPP; + bool rpm; + + if (xdna->dev_info->ops->resume) { + rpm = xdna->rpm_on; + xdna->rpm_on = false; + ret = xdna->dev_info->ops->resume(xdna); + xdna->rpm_on = rpm; + } + + XDNA_DBG(xdna, "Resume done ret %d", ret); + return ret; +} + +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna) +{ + struct device *dev = xdna->ddev.dev; + int ret; + + if (!xdna->rpm_on) + return 0; + + ret = pm_runtime_resume_and_get(dev); + if (ret) { + XDNA_ERR(xdna, "Resume failed: %d", ret); + pm_runtime_set_suspended(dev); + } + + return ret; +} + +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna) +{ + struct device *dev = xdna->ddev.dev; + + if (!xdna->rpm_on) + return; + + pm_runtime_put_autosuspend(dev); +} + +void amdxdna_pm_init(struct amdxdna_dev *xdna) +{ + struct device *dev = xdna->ddev.dev; + + pm_runtime_set_active(dev); + pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY); + pm_runtime_use_autosuspend(dev); + pm_runtime_allow(dev); + pm_runtime_put_autosuspend(dev); + xdna->rpm_on = true; +} + +void amdxdna_pm_fini(struct amdxdna_dev *xdna) +{ + struct device *dev = xdna->ddev.dev; + + xdna->rpm_on = false; + pm_runtime_get_noresume(dev); + pm_runtime_forbid(dev); +} diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/amdxdna/amdxdna_pm.h new file mode 100644 index 000000000000..77b2d6e45570 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_pm.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_PM_H_ +#define _AMDXDNA_PM_H_ + +#include "amdxdna_pci_drv.h" + +int amdxdna_pm_suspend(struct device *dev); +int amdxdna_pm_resume(struct device *dev); +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna); +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna); +void amdxdna_pm_init(struct amdxdna_dev *xdna); +void amdxdna_pm_fini(struct amdxdna_dev *xdna); + +#endif /* _AMDXDNA_PM_H_ */ diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c index e4f6dac7d00f..ec407f3b48fc 100644 --- a/drivers/accel/amdxdna/npu1_regs.c +++ b/drivers/accel/amdxdna/npu1_regs.c @@ -46,6 +46,7 @@ const struct rt_config npu1_default_rt_cfg[] = { { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ + { 4, 1, AIE2_RT_CFG_INIT }, /* Debug BO */ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ { 0 }, }; @@ -62,16 +63,23 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = { { 0 } }; +static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = { + { .feature = AIE2_NPU_COMMAND, .min_minor = 8 }, + { 0 } +}; + static const struct amdxdna_dev_priv npu1_dev_priv = { .fw_path = "amdnpu/1502_00/npu.sbin", .protocol_major = 0x5, .protocol_minor = 0x7, .rt_config = npu1_default_rt_cfg, .dpm_clk_tbl = npu1_dpm_clk_table, + .fw_feature_tbl = npu1_fw_feature_table, .col_align = COL_ALIGN_NONE, .mbox_dev_addr = NPU1_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ .sram_dev_addr = NPU1_SRAM_BAR_BASE, + .hwctx_limit = 6, .sram_offs = { DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU1_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU1_SRAM, MPNPU_SRAM_I2X_MAILBOX_15), diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c index a081cac75ee0..86f87d0d1354 100644 --- a/drivers/accel/amdxdna/npu2_regs.c +++ b/drivers/accel/amdxdna/npu2_regs.c @@ -67,10 +67,12 @@ static const struct amdxdna_dev_priv npu2_dev_priv = { .protocol_minor = 0x6, .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, + .fw_feature_tbl = npu4_fw_feature_table, .col_align = COL_ALIGN_NATURE, .mbox_dev_addr = NPU2_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ .sram_dev_addr = NPU2_SRAM_BAR_BASE, + .hwctx_limit = 16, .sram_offs = { DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c index 9f2e33182ec6..986a5f28ba24 100644 --- a/drivers/accel/amdxdna/npu4_regs.c +++ b/drivers/accel/amdxdna/npu4_regs.c @@ -63,10 +63,14 @@ const struct rt_config npu4_default_rt_cfg[] = { { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ + { 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */ + { 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 13, 0, AIE2_RT_CFG_FORCE_PREEMPT }, + { 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT }, { 0 }, }; @@ -82,16 +86,24 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = { { 0 } }; +const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = { + { .feature = AIE2_NPU_COMMAND, .min_minor = 15 }, + { .feature = AIE2_PREEMPT, .min_minor = 12 }, + { 0 } +}; + static const struct amdxdna_dev_priv npu4_dev_priv = { .fw_path = "amdnpu/17f0_10/npu.sbin", .protocol_major = 0x6, .protocol_minor = 12, .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, + .fw_feature_tbl = npu4_fw_feature_table, .col_align = COL_ALIGN_NATURE, .mbox_dev_addr = NPU4_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ .sram_dev_addr = NPU4_SRAM_BAR_BASE, + .hwctx_limit = 16, .sram_offs = { DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c index 5f1cf83461c4..75ad97f0b937 100644 --- a/drivers/accel/amdxdna/npu5_regs.c +++ b/drivers/accel/amdxdna/npu5_regs.c @@ -67,10 +67,12 @@ static const struct amdxdna_dev_priv npu5_dev_priv = { .protocol_minor = 12, .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, + .fw_feature_tbl = npu4_fw_feature_table, .col_align = COL_ALIGN_NATURE, .mbox_dev_addr = NPU5_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ .sram_dev_addr = NPU5_SRAM_BAR_BASE, + .hwctx_limit = 16, .sram_offs = { DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c index 94a7005685a7..758dc013fe13 100644 --- a/drivers/accel/amdxdna/npu6_regs.c +++ b/drivers/accel/amdxdna/npu6_regs.c @@ -67,10 +67,12 @@ static const struct amdxdna_dev_priv npu6_dev_priv = { .protocol_minor = 12, .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, + .fw_feature_tbl = npu4_fw_feature_table, .col_align = COL_ALIGN_NATURE, .mbox_dev_addr = NPU6_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ .sram_dev_addr = NPU6_SRAM_BAR_BASE, + .hwctx_limit = 16, .sram_offs = { DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), |
