summaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
authorSimona Vetter <simona.vetter@ffwll.ch>2025-10-24 13:25:19 +0200
committerSimona Vetter <simona.vetter@ffwll.ch>2025-10-24 13:25:20 +0200
commit098456f3141bf9e0c0d8973695ca38a03465ccd6 (patch)
tree07d1d0013bd58f689f0687b0c64b0cfcc0fad5c1 /drivers/accel
parent6200442de089468ff283becb81382d6ac23c25e9 (diff)
parent7ea0468380216c10b73633b976d33efa8c12d375 (diff)
Merge tag 'drm-misc-next-2025-10-21' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-next
drm-misc-next for v6.19: UAPI Changes: amdxdna: - Support reading last hardware error Cross-subsystem Changes: dma-buf: - heaps: Create heap per CMA reserved location; Improve user-space documentation Core Changes: atomic: - Clean up and improve state-handling interfaces, update drivers bridge: - Improve ref counting buddy: - Optimize block management Driver Changes: amdxdna: - Fix runtime power management - Support firmware debug output ast: - Set quirks for each chip model atmel-hlcdc: - Set LCDC_ATTRE register in plane disable - Set correct values for plane scaler bochs: - Use vblank timer bridge: - synopsis: Support CEC; Init timer with correct frequency cirrus-qemu: - Use vblank timer imx: - Clean up ivu: - Update JSM API to 3.33.0 - Reset engine on more job errors - Return correct error codes for jobs komeda: - Use drm_ logging functions panel: - edp: Support AUO B116XAN02.0 panfrost: - Embed struct drm_driver in Panfrost device - Improve error handling - Clean up job handling panthor: - Support custom ASN_HASH for mt8196 renesas: - rz-du: Fix dependencies rockchip: - dsi: Add support for RK3368 - Fix LUT size for RK3386 sitronix: - Fix output position when clearing screens qaic: - Support dma-buf exports - Support new firmware's READ_DATA implementation - Replace kcalloc with memdup - Replace snprintf() with sysfs_emit() - Avoid overflows in arithmetics - Clean up - Fixes qxl: - Use vblank timer rockchip: - Clean up mode-setting code vgem: - Fix fence timer deadlock virtgpu: - Use vblank timer Signed-off-by: Simona Vetter <simona.vetter@ffwll.ch> From: Thomas Zimmermann <tzimmermann@suse.de> Link: https://lore.kernel.org/r/20251021111837.GA40643@linux.fritz.box
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/amdxdna/TODO1
-rw-r--r--drivers/accel/amdxdna/aie2_ctx.c125
-rw-r--r--drivers/accel/amdxdna/aie2_error.c95
-rw-r--r--drivers/accel/amdxdna/aie2_message.c31
-rw-r--r--drivers/accel/amdxdna/aie2_msg_priv.h18
-rw-r--r--drivers/accel/amdxdna/aie2_pci.c4
-rw-r--r--drivers/accel/amdxdna/aie2_pci.h8
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.c39
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.h16
-rw-r--r--drivers/accel/amdxdna/amdxdna_error.h59
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.c3
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.h6
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.c4
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.h1
-rw-r--r--drivers/accel/amdxdna/npu1_regs.c1
-rw-r--r--drivers/accel/amdxdna/npu4_regs.c1
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c3
-rw-r--r--drivers/accel/ivpu/ivpu_job.c57
-rw-r--r--drivers/accel/ivpu/ivpu_job.h3
-rw-r--r--drivers/accel/ivpu/vpu_jsm_api.h150
-rw-r--r--drivers/accel/qaic/qaic_control.c9
-rw-r--r--drivers/accel/qaic/qaic_data.c96
-rw-r--r--drivers/accel/qaic/qaic_ras.c6
-rw-r--r--drivers/accel/qaic/sahara.c159
24 files changed, 704 insertions, 191 deletions
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
index ad8ac6e315b6..0e4bbebeaedf 100644
--- a/drivers/accel/amdxdna/TODO
+++ b/drivers/accel/amdxdna/TODO
@@ -1,2 +1 @@
- Add debugfs support
-- Add debug BO support
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 691fdb3b008f..63450b7773ac 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -226,11 +226,10 @@ out:
}
static int
-aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size)
+aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size)
{
struct amdxdna_sched_job *job = handle;
int ret = 0;
- u32 status;
if (unlikely(!data))
goto out;
@@ -240,8 +239,7 @@ aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size)
goto out;
}
- status = readl(data);
- XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+ job->drv_cmd->result = readl(data);
out:
aie2_sched_notify(job);
@@ -314,8 +312,18 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
kref_get(&job->refcnt);
fence = dma_fence_get(job->fence);
- if (unlikely(!cmd_abo)) {
- ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
+ if (job->drv_cmd) {
+ switch (job->drv_cmd->opcode) {
+ case SYNC_DEBUG_BO:
+ ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
+ break;
+ case ATTACH_DEBUG_BO:
+ ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
goto out;
}
@@ -610,10 +618,14 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
goto free_entity;
}
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto free_col_list;
+
ret = aie2_alloc_resource(hwctx);
if (ret) {
XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
- goto free_col_list;
+ goto suspend_put;
}
ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
@@ -628,6 +640,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
goto release_resource;
}
+ amdxdna_pm_suspend_put(xdna);
hwctx->status = HWCTX_STAT_INIT;
ndev = xdna->dev_handle;
@@ -640,6 +653,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
release_resource:
aie2_release_resource(hwctx);
+suspend_put:
+ amdxdna_pm_suspend_put(xdna);
free_col_list:
kfree(hwctx->col_list);
free_entity:
@@ -759,6 +774,74 @@ free_cus:
return ret;
}
+static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+ struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq);
+
+ if (!out_fence) {
+ XDNA_ERR(hwctx->client->xdna, "Failed to get fence");
+ return;
+ }
+
+ dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_put(out_fence);
+}
+
+static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl,
+ bool attach)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_drv_cmd cmd = { 0 };
+ struct amdxdna_gem_obj *abo;
+ u64 seq;
+ int ret;
+
+ abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV);
+ if (!abo) {
+ XDNA_ERR(xdna, "Get bo %d failed", bo_hdl);
+ return -EINVAL;
+ }
+
+ if (attach) {
+ if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) {
+ ret = -EBUSY;
+ goto put_obj;
+ }
+ cmd.opcode = ATTACH_DEBUG_BO;
+ } else {
+ if (abo->assigned_hwctx != hwctx->id) {
+ ret = -EINVAL;
+ goto put_obj;
+ }
+ cmd.opcode = DETACH_DEBUG_BO;
+ }
+
+ ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
+ &bo_hdl, 1, hwctx->id, &seq);
+ if (ret) {
+ XDNA_ERR(xdna, "Submit command failed");
+ goto put_obj;
+ }
+
+ aie2_cmd_wait(hwctx, seq);
+ if (cmd.result) {
+ XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
+ goto put_obj;
+ }
+
+ if (attach)
+ abo->assigned_hwctx = hwctx->id;
+ else
+ abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE;
+
+ XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name);
+
+put_obj:
+ amdxdna_gem_put_obj(abo);
+ return ret;
+}
+
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
{
struct amdxdna_dev *xdna = hwctx->client->xdna;
@@ -768,14 +851,40 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu
case DRM_AMDXDNA_HWCTX_CONFIG_CU:
return aie2_hwctx_cu_config(hwctx, buf, size);
case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
+ return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true);
case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
- return -EOPNOTSUPP;
+ return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false);
default:
XDNA_DBG(xdna, "Not supported type %d", type);
return -EOPNOTSUPP;
}
}
+int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_drv_cmd cmd = { 0 };
+ u64 seq;
+ int ret;
+
+ cmd.opcode = SYNC_DEBUG_BO;
+ ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
+ &debug_bo_hdl, 1, hwctx->id, &seq);
+ if (ret) {
+ XDNA_ERR(xdna, "Submit command failed");
+ return ret;
+ }
+
+ aie2_cmd_wait(hwctx, seq);
+ if (cmd.result) {
+ XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
+ return ret;
+ }
+
+ return 0;
+}
+
static int aie2_populate_range(struct amdxdna_gem_obj *abo)
{
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c
index 5ee905632a39..d452008ec4f4 100644
--- a/drivers/accel/amdxdna/aie2_error.c
+++ b/drivers/accel/amdxdna/aie2_error.c
@@ -13,6 +13,7 @@
#include "aie2_msg_priv.h"
#include "aie2_pci.h"
+#include "amdxdna_error.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_pci_drv.h"
@@ -46,6 +47,7 @@ enum aie_module_type {
AIE_MEM_MOD = 0,
AIE_CORE_MOD,
AIE_PL_MOD,
+ AIE_UNKNOWN_MOD,
};
enum aie_error_category {
@@ -143,6 +145,31 @@ static const struct aie_event_category aie_ml_shim_tile_event_cat[] = {
EVENT_CATEGORY(74U, AIE_ERROR_LOCK),
};
+static const enum amdxdna_error_num aie_cat_err_num_map[] = {
+ [AIE_ERROR_SATURATION] = AMDXDNA_ERROR_NUM_AIE_SATURATION,
+ [AIE_ERROR_FP] = AMDXDNA_ERROR_NUM_AIE_FP,
+ [AIE_ERROR_STREAM] = AMDXDNA_ERROR_NUM_AIE_STREAM,
+ [AIE_ERROR_ACCESS] = AMDXDNA_ERROR_NUM_AIE_ACCESS,
+ [AIE_ERROR_BUS] = AMDXDNA_ERROR_NUM_AIE_BUS,
+ [AIE_ERROR_INSTRUCTION] = AMDXDNA_ERROR_NUM_AIE_INSTRUCTION,
+ [AIE_ERROR_ECC] = AMDXDNA_ERROR_NUM_AIE_ECC,
+ [AIE_ERROR_LOCK] = AMDXDNA_ERROR_NUM_AIE_LOCK,
+ [AIE_ERROR_DMA] = AMDXDNA_ERROR_NUM_AIE_DMA,
+ [AIE_ERROR_MEM_PARITY] = AMDXDNA_ERROR_NUM_AIE_MEM_PARITY,
+ [AIE_ERROR_UNKNOWN] = AMDXDNA_ERROR_NUM_UNKNOWN,
+};
+
+static_assert(ARRAY_SIZE(aie_cat_err_num_map) == AIE_ERROR_UNKNOWN + 1);
+
+static const enum amdxdna_error_module aie_err_mod_map[] = {
+ [AIE_MEM_MOD] = AMDXDNA_ERROR_MODULE_AIE_MEMORY,
+ [AIE_CORE_MOD] = AMDXDNA_ERROR_MODULE_AIE_CORE,
+ [AIE_PL_MOD] = AMDXDNA_ERROR_MODULE_AIE_PL,
+ [AIE_UNKNOWN_MOD] = AMDXDNA_ERROR_MODULE_UNKNOWN,
+};
+
+static_assert(ARRAY_SIZE(aie_err_mod_map) == AIE_UNKNOWN_MOD + 1);
+
static enum aie_error_category
aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type)
{
@@ -176,12 +203,40 @@ aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type)
if (event_id != lut[i].event_id)
continue;
+ if (lut[i].category > AIE_ERROR_UNKNOWN)
+ return AIE_ERROR_UNKNOWN;
+
return lut[i].category;
}
return AIE_ERROR_UNKNOWN;
}
+static void aie2_update_last_async_error(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err)
+{
+ struct aie_error *errs = err_info;
+ enum amdxdna_error_module err_mod;
+ enum aie_error_category aie_err;
+ enum amdxdna_error_num err_num;
+ struct aie_error *last_err;
+
+ last_err = &errs[num_err - 1];
+ if (last_err->mod_type >= AIE_UNKNOWN_MOD) {
+ err_num = aie_cat_err_num_map[AIE_ERROR_UNKNOWN];
+ err_mod = aie_err_mod_map[AIE_UNKNOWN_MOD];
+ } else {
+ aie_err = aie_get_error_category(last_err->row,
+ last_err->event_id,
+ last_err->mod_type);
+ err_num = aie_cat_err_num_map[aie_err];
+ err_mod = aie_err_mod_map[last_err->mod_type];
+ }
+
+ ndev->last_async_err.err_code = AMDXDNA_ERROR_ENCODE(err_num, err_mod);
+ ndev->last_async_err.ts_us = ktime_to_us(ktime_get_real());
+ ndev->last_async_err.ex_err_code = AMDXDNA_EXTRA_ERR_ENCODE(last_err->row, last_err->col);
+}
+
static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err)
{
struct aie_error *errs = err_info;
@@ -264,29 +319,14 @@ static void aie2_error_worker(struct work_struct *err_work)
}
mutex_lock(&xdna->dev_lock);
+ aie2_update_last_async_error(e->ndev, info->payload, info->err_cnt);
+
/* Re-sent this event to firmware */
if (aie2_error_event_send(e))
XDNA_WARN(xdna, "Unable to register async event");
mutex_unlock(&xdna->dev_lock);
}
-int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev)
-{
- struct amdxdna_dev *xdna = ndev->xdna;
- struct async_event *e;
- int i, ret;
-
- drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
- for (i = 0; i < ndev->async_events->event_cnt; i++) {
- e = &ndev->async_events->event[i];
- ret = aie2_error_event_send(e);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev)
{
struct amdxdna_dev *xdna = ndev->xdna;
@@ -341,6 +381,10 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
e->size = ASYNC_BUF_SIZE;
e->resp.status = MAX_AIE2_STATUS_CODE;
INIT_WORK(&e->work, aie2_error_worker);
+
+ ret = aie2_error_event_send(e);
+ if (ret)
+ goto free_wq;
}
ndev->async_events = events;
@@ -349,6 +393,8 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
events->event_cnt, events->size);
return 0;
+free_wq:
+ destroy_workqueue(events->wq);
free_buf:
dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
events->addr, DMA_FROM_DEVICE);
@@ -356,3 +402,18 @@ free_events:
kfree(events);
return ret;
}
+
+int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, struct amdxdna_drm_get_array *args)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ args->num_element = 1;
+ args->element_size = sizeof(ndev->last_async_err);
+ if (copy_to_user(u64_to_user_ptr(args->buffer),
+ &ndev->last_async_err, args->element_size))
+ return -EFAULT;
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 4660e8297ed8..0ec1dc6fe668 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -749,7 +749,7 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int ret = 0;
req.src_addr = 0;
- req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr;
+ req.dst_addr = amdxdna_dev_bo_offset(abo);
req.size = abo->mem.size;
/* Device to Host */
@@ -773,3 +773,32 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
return 0;
}
+
+int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct config_debug_bo_req req;
+ struct xdna_mailbox_msg msg;
+
+ if (job->drv_cmd->opcode == ATTACH_DEBUG_BO)
+ req.config = DEBUG_BO_REGISTER;
+ else
+ req.config = DEBUG_BO_UNREGISTER;
+
+ req.offset = amdxdna_dev_bo_offset(abo);
+ req.size = abo->mem.size;
+
+ XDNA_DBG(xdna, "offset 0x%llx size 0x%llx config %d",
+ req.offset, req.size, req.config);
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ msg.opcode = MSG_OP_CONFIG_DEBUG_BO;
+
+ return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+}
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
index 6df9065b13f6..cb53132029eb 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -18,6 +18,7 @@ enum aie2_msg_opcode {
MSG_OP_CONFIG_CU = 0x11,
MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12,
MSG_OP_CHAIN_EXEC_DPU = 0x13,
+ MSG_OP_CONFIG_DEBUG_BO = 0x14,
MSG_OP_MAX_XRT_OPCODE,
MSG_OP_SUSPEND = 0x101,
MSG_OP_RESUME = 0x102,
@@ -365,4 +366,21 @@ struct sync_bo_req {
struct sync_bo_resp {
enum aie2_msg_status status;
} __packed;
+
+#define DEBUG_BO_UNREGISTER 0
+#define DEBUG_BO_REGISTER 1
+struct config_debug_bo_req {
+ __u64 offset;
+ __u64 size;
+ /*
+ * config operations.
+ * DEBUG_BO_REGISTER: Register debug buffer
+ * DEBUG_BO_UNREGISTER: Unregister debug buffer
+ */
+ __u32 config;
+} __packed;
+
+struct config_debug_bo_resp {
+ enum aie2_msg_status status;
+} __packed;
#endif /* _AIE2_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 8a66f276100e..f48045318dc0 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -924,6 +924,9 @@ static int aie2_get_array(struct amdxdna_client *client,
case DRM_AMDXDNA_HW_CONTEXT_ALL:
ret = aie2_query_ctx_status_array(client, args);
break;
+ case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
+ ret = aie2_get_array_async_error(xdna->dev_handle, args);
+ break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
@@ -1001,6 +1004,7 @@ const struct amdxdna_dev_ops aie2_ops = {
.hwctx_init = aie2_hwctx_init,
.hwctx_fini = aie2_hwctx_fini,
.hwctx_config = aie2_hwctx_config,
+ .hwctx_sync_debug_bo = aie2_hwctx_sync_debug_bo,
.cmd_submit = aie2_cmd_submit,
.hmm_invalidate = aie2_hmm_invalidate,
.get_array = aie2_get_array,
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 289a23ecd5f1..243ac21d50c1 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -190,6 +190,8 @@ struct amdxdna_dev_hdl {
enum aie2_dev_status dev_status;
u32 hwctx_num;
+
+ struct amdxdna_async_error last_async_err;
};
#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
@@ -253,8 +255,9 @@ void aie2_psp_stop(struct psp_device *psp);
/* aie2_error.c */
int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
-int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev);
int aie2_error_async_msg_thread(void *data);
+int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
+ struct amdxdna_drm_get_array *args);
/* aie2_message.c */
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
@@ -284,11 +287,14 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int (*notify_cb)(void *, void __iomem *, size_t));
+int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t));
/* aie2_hwctx.c */
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
void aie2_hwctx_suspend(struct amdxdna_client *client);
int aie2_hwctx_resume(struct amdxdna_client *client);
int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 868ca369e0a0..d18182c59668 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -328,6 +328,38 @@ unlock_srcu:
return ret;
}
+int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx *hwctx;
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+ int ret, idx;
+
+ if (!xdna->dev_info->ops->hwctx_sync_debug_bo)
+ return -EOPNOTSUPP;
+
+ gobj = drm_gem_object_lookup(client->filp, debug_bo_hdl);
+ if (!gobj)
+ return -EINVAL;
+
+ abo = to_xdna_obj(gobj);
+ guard(mutex)(&xdna->dev_lock);
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = xa_load(&client->hwctx_xa, abo->assigned_hwctx);
+ if (!hwctx) {
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ ret = xdna->dev_info->ops->hwctx_sync_debug_bo(hwctx, debug_bo_hdl);
+
+unlock_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ drm_gem_object_put(gobj);
+ return ret;
+}
+
static void
amdxdna_arg_bos_put(struct amdxdna_sched_job *job)
{
@@ -393,6 +425,7 @@ void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job)
}
int amdxdna_cmd_submit(struct amdxdna_client *client,
+ struct amdxdna_drv_cmd *drv_cmd,
u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt,
u32 hwctx_hdl, u64 *seq)
{
@@ -406,6 +439,8 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
if (!job)
return -ENOMEM;
+ job->drv_cmd = drv_cmd;
+
if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) {
job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD);
if (!job->cmd_bo) {
@@ -413,8 +448,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
ret = -EINVAL;
goto free_job;
}
- } else {
- job->cmd_bo = NULL;
}
ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt);
@@ -508,7 +541,7 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
}
}
- ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls,
+ ret = amdxdna_cmd_submit(client, NULL, cmd_bo_hdl, arg_bo_hdls,
args->arg_count, args->hwctx, &args->seq);
if (ret)
XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index 7cd7a55936f0..cbe60efbe60b 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -95,6 +95,17 @@ struct amdxdna_hwctx {
#define drm_job_to_xdna_job(j) \
container_of(j, struct amdxdna_sched_job, base)
+enum amdxdna_job_opcode {
+ SYNC_DEBUG_BO,
+ ATTACH_DEBUG_BO,
+ DETACH_DEBUG_BO,
+};
+
+struct amdxdna_drv_cmd {
+ enum amdxdna_job_opcode opcode;
+ u32 result;
+};
+
struct amdxdna_sched_job {
struct drm_sched_job base;
struct kref refcnt;
@@ -106,6 +117,7 @@ struct amdxdna_sched_job {
struct dma_fence *out_fence;
bool job_done;
u64 seq;
+ struct amdxdna_drv_cmd *drv_cmd;
struct amdxdna_gem_obj *cmd_bo;
size_t bo_cnt;
struct drm_gem_object *bos[] __counted_by(bo_cnt);
@@ -143,9 +155,11 @@ void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
int amdxdna_hwctx_walk(struct amdxdna_client *client, void *arg,
int (*walk)(struct amdxdna_hwctx *hwctx, void *arg));
+int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl);
int amdxdna_cmd_submit(struct amdxdna_client *client,
- u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt,
+ struct amdxdna_drv_cmd *drv_cmd, u32 cmd_bo_hdls,
+ u32 *arg_bo_hdls, u32 arg_bo_cnt,
u32 hwctx_hdl, u64 *seq);
int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
diff --git a/drivers/accel/amdxdna/amdxdna_error.h b/drivers/accel/amdxdna/amdxdna_error.h
new file mode 100644
index 000000000000..c51de86ec12b
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_error.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_ERROR_H_
+#define _AMDXDNA_ERROR_H_
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+
+#define AMDXDNA_ERR_DRV_AIE 4
+#define AMDXDNA_ERR_SEV_CRITICAL 3
+#define AMDXDNA_ERR_CLASS_AIE 2
+
+#define AMDXDNA_ERR_NUM_MASK GENMASK_U64(15, 0)
+#define AMDXDNA_ERR_DRV_MASK GENMASK_U64(23, 16)
+#define AMDXDNA_ERR_SEV_MASK GENMASK_U64(31, 24)
+#define AMDXDNA_ERR_MOD_MASK GENMASK_U64(39, 32)
+#define AMDXDNA_ERR_CLASS_MASK GENMASK_U64(47, 40)
+
+enum amdxdna_error_num {
+ AMDXDNA_ERROR_NUM_AIE_SATURATION = 3,
+ AMDXDNA_ERROR_NUM_AIE_FP,
+ AMDXDNA_ERROR_NUM_AIE_STREAM,
+ AMDXDNA_ERROR_NUM_AIE_ACCESS,
+ AMDXDNA_ERROR_NUM_AIE_BUS,
+ AMDXDNA_ERROR_NUM_AIE_INSTRUCTION,
+ AMDXDNA_ERROR_NUM_AIE_ECC,
+ AMDXDNA_ERROR_NUM_AIE_LOCK,
+ AMDXDNA_ERROR_NUM_AIE_DMA,
+ AMDXDNA_ERROR_NUM_AIE_MEM_PARITY,
+ AMDXDNA_ERROR_NUM_UNKNOWN = 15,
+};
+
+enum amdxdna_error_module {
+ AMDXDNA_ERROR_MODULE_AIE_CORE = 3,
+ AMDXDNA_ERROR_MODULE_AIE_MEMORY,
+ AMDXDNA_ERROR_MODULE_AIE_SHIM,
+ AMDXDNA_ERROR_MODULE_AIE_NOC,
+ AMDXDNA_ERROR_MODULE_AIE_PL,
+ AMDXDNA_ERROR_MODULE_UNKNOWN = 8,
+};
+
+#define AMDXDNA_ERROR_ENCODE(err_num, err_mod) \
+ (FIELD_PREP(AMDXDNA_ERR_NUM_MASK, err_num) | \
+ FIELD_PREP_CONST(AMDXDNA_ERR_DRV_MASK, AMDXDNA_ERR_DRV_AIE) | \
+ FIELD_PREP_CONST(AMDXDNA_ERR_SEV_MASK, AMDXDNA_ERR_SEV_CRITICAL) | \
+ FIELD_PREP(AMDXDNA_ERR_MOD_MASK, err_mod) | \
+ FIELD_PREP_CONST(AMDXDNA_ERR_CLASS_MASK, AMDXDNA_ERR_CLASS_AIE))
+
+#define AMDXDNA_EXTRA_ERR_COL_MASK GENMASK_U64(7, 0)
+#define AMDXDNA_EXTRA_ERR_ROW_MASK GENMASK_U64(15, 8)
+
+#define AMDXDNA_EXTRA_ERR_ENCODE(row, col) \
+ (FIELD_PREP(AMDXDNA_EXTRA_ERR_COL_MASK, col) | \
+ FIELD_PREP(AMDXDNA_EXTRA_ERR_ROW_MASK, row))
+
+#endif /* _AMDXDNA_ERROR_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index 7f91863c3f24..61e0136c21a8 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -962,6 +962,9 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
XDNA_DBG(xdna, "Sync bo %d offset 0x%llx, size 0x%llx\n",
args->handle, args->offset, args->size);
+ if (args->direction == SYNC_DIRECT_FROM_DEVICE)
+ ret = amdxdna_hwctx_sync_debug_bo(abo->client, args->handle);
+
put_obj:
drm_gem_object_put(gobj);
return ret;
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
index ae29db94a9d3..f79fc7f3c93b 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.h
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -7,6 +7,7 @@
#define _AMDXDNA_GEM_H_
#include <linux/hmm.h>
+#include "amdxdna_pci_drv.h"
struct amdxdna_umap {
struct vm_area_struct *vma;
@@ -62,6 +63,11 @@ static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo)
drm_gem_object_put(to_gobj(abo));
}
+static inline u64 amdxdna_dev_bo_offset(struct amdxdna_gem_obj *abo)
+{
+ return abo->mem.dev_addr - abo->client->dev_heap->mem.dev_addr;
+}
+
void amdxdna_umap_put(struct amdxdna_umap *mapp);
struct drm_gem_object *
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index aa04452310e5..3599e713bfcb 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -27,9 +27,11 @@ MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin");
/*
* 0.0: Initial version
* 0.1: Support getting all hardware contexts by DRM_IOCTL_AMDXDNA_GET_ARRAY
+ * 0.2: Support getting last error hardware error
+ * 0.3: Support firmware debug buffer
*/
#define AMDXDNA_DRIVER_MAJOR 0
-#define AMDXDNA_DRIVER_MINOR 1
+#define AMDXDNA_DRIVER_MINOR 3
/*
* Bind the driver base on (vendor_id, device_id) pair and later use the
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index 626beebf730e..c99477f5e454 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -55,6 +55,7 @@ struct amdxdna_dev_ops {
int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+ int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index e4f6dac7d00f..10124cccb102 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -46,6 +46,7 @@
const struct rt_config npu1_default_rt_cfg[] = {
{ 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 4, 1, AIE2_RT_CFG_INIT }, /* Debug BO */
{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 0 },
};
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index 9f2e33182ec6..e1da882420ec 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -63,6 +63,7 @@
const struct rt_config npu4_default_rt_cfg[] = {
{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */
{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index e9830ad48d4b..e7277e02840a 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -46,12 +46,13 @@ static inline void ivpu_bo_unlock(struct ivpu_bo *bo)
static struct sg_table *ivpu_bo_map_attachment(struct ivpu_device *vdev, struct ivpu_bo *bo)
{
- struct sg_table *sgt = bo->base.sgt;
+ struct sg_table *sgt;
drm_WARN_ON(&vdev->drm, !bo->base.base.import_attach);
ivpu_bo_lock(bo);
+ sgt = bo->base.sgt;
if (!sgt) {
sgt = dma_buf_map_attachment(bo->base.base.import_attach, DMA_BIDIRECTIONAL);
if (IS_ERR(sgt))
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 17273c68f84c..ba4535a75aa7 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -564,21 +564,26 @@ static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *
return job;
}
-static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status)
+bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status)
{
- struct ivpu_job *job;
-
lockdep_assert_held(&vdev->submitted_jobs_lock);
- job = xa_load(&vdev->submitted_jobs_xa, job_id);
- if (!job)
- return -ENOENT;
+ switch (job_status) {
+ case VPU_JSM_STATUS_PROCESSING_ERR:
+ case VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN ... VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX:
+ {
+ struct ivpu_job *job = xa_load(&vdev->submitted_jobs_xa, job_id);
- if (job_status == VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW) {
+ if (!job)
+ return false;
+
+ /* Trigger an engine reset */
guard(mutex)(&job->file_priv->lock);
+ job->job_status = job_status;
+
if (job->file_priv->has_mmu_faults)
- return 0;
+ return false;
/*
* Mark context as faulty and defer destruction of the job to jobs abort thread
@@ -587,22 +592,42 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
*/
job->file_priv->has_mmu_faults = true;
queue_work(system_wq, &vdev->context_abort_work);
- return 0;
+ return true;
}
+ default:
+ /* Complete job with error status, engine reset not required */
+ break;
+ }
+
+ return false;
+}
+
+static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status)
+{
+ struct ivpu_job *job;
- job = ivpu_job_remove_from_submitted_jobs(vdev, job_id);
+ lockdep_assert_held(&vdev->submitted_jobs_lock);
+
+ job = xa_load(&vdev->submitted_jobs_xa, job_id);
if (!job)
return -ENOENT;
- if (job->file_priv->has_mmu_faults)
- job_status = DRM_IVPU_JOB_STATUS_ABORTED;
+ ivpu_job_remove_from_submitted_jobs(vdev, job_id);
+
+ if (job->job_status == VPU_JSM_STATUS_SUCCESS) {
+ if (job->file_priv->has_mmu_faults)
+ job->job_status = DRM_IVPU_JOB_STATUS_ABORTED;
+ else
+ job->job_status = job_status;
+ }
- job->bos[CMD_BUF_IDX]->job_status = job_status;
+ job->bos[CMD_BUF_IDX]->job_status = job->job_status;
dma_fence_signal(job->done_fence);
trace_job("done", job);
ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n",
- job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status);
+ job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx,
+ job->job_status);
ivpu_job_destroy(job);
ivpu_stop_job_timeout_detection(vdev);
@@ -1022,7 +1047,9 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
mutex_lock(&vdev->submitted_jobs_lock);
- ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
+ if (!ivpu_job_handle_engine_error(vdev, payload->job_id, payload->job_status))
+ /* No engine error, complete the job normally */
+ ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
mutex_unlock(&vdev->submitted_jobs_lock);
}
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
index d2fc4c151614..3ab61e6a5616 100644
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -51,6 +51,7 @@ struct ivpu_cmdq {
* @cmdq_id: Command queue ID used for submission
* @job_id: Unique job ID for tracking and status reporting
* @engine_idx: Engine index for job execution
+ * @job_status: Status reported by firmware for this job
* @primary_preempt_buf: Primary preemption buffer for job
* @secondary_preempt_buf: Secondary preemption buffer for job (optional)
* @bo_count: Number of buffer objects associated with this job
@@ -64,6 +65,7 @@ struct ivpu_job {
u32 cmdq_id;
u32 job_id;
u32 engine_idx;
+ u32 job_status;
struct ivpu_bo *primary_preempt_buf;
struct ivpu_bo *secondary_preempt_buf;
size_t bo_count;
@@ -83,6 +85,7 @@ void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id)
void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
+bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status);
void ivpu_context_abort_work_fn(struct work_struct *work);
void ivpu_jobs_abort_all(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index de1b37ea1251..bca6a44dc041 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -23,12 +23,12 @@
/*
* Minor version changes when API backward compatibility is preserved.
*/
-#define VPU_JSM_API_VER_MINOR 32
+#define VPU_JSM_API_VER_MINOR 33
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
-#define VPU_JSM_API_VER_PATCH 5
+#define VPU_JSM_API_VER_PATCH 0
/*
* Index in the API version table
@@ -76,8 +76,11 @@
#define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU
/* Job status returned when the job was preempted mid-command */
#define VPU_JSM_STATUS_PREEMPTED_MID_COMMAND 0xDU
+/* Range of status codes that require engine reset */
+#define VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN 0xEU
#define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU
#define VPU_JSM_STATUS_MVNCI_PREEMPTION_TIMED_OUT 0xFU
+#define VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX 0x1FU
/*
* Host <-> VPU IPC channels.
@@ -404,8 +407,8 @@ struct vpu_hws_native_fence_log_header {
/** Index of the first free entry in buffer. */
u32 first_free_entry_idx;
/**
- * Incremented each time NPU wraps around
- * the buffer to write next entry.
+ * Incremented whenever the NPU wraps around the buffer and writes
+ * to the first entry again.
*/
u32 wraparound_count;
};
@@ -454,10 +457,17 @@ struct vpu_hws_native_fence_log_buffer {
* Host <-> VPU IPC messages types.
*/
enum vpu_ipc_msg_type {
+ /** Unsupported command */
VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF,
- /* IPC Host -> Device, Async commands */
+ /** IPC Host -> Device, base id for async commands */
VPU_JSM_MSG_ASYNC_CMD = 0x1100,
+ /**
+ * Reset engine. The NPU cancels all the jobs currently executing on the target
+ * engine making the engine become idle and then does a HW reset, before returning
+ * to the host.
+ * @see struct vpu_ipc_msg_payload_engine_reset
+ */
VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD,
/**
* Preempt engine. The NPU stops (preempts) all the jobs currently
@@ -467,6 +477,7 @@ enum vpu_ipc_msg_type {
* the target engine, but it stops processing them (until the queue doorbell
* is rung again); the host is responsible to reset the job queue, either
* after preemption or when resubmitting jobs to the queue.
+ * @see vpu_ipc_msg_payload_engine_preempt
*/
VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101,
/**
@@ -583,23 +594,32 @@ enum vpu_ipc_msg_type {
* @see vpu_ipc_msg_payload_hws_resume_engine
*/
VPU_JSM_MSG_HWS_ENGINE_RESUME = 0x111b,
- /* Control command: Enable survivability/DCT mode */
+ /**
+ * Control command: Enable survivability/DCT mode
+ * @see vpu_ipc_msg_payload_pwr_dct_control
+ */
VPU_JSM_MSG_DCT_ENABLE = 0x111c,
- /* Control command: Disable survivability/DCT mode */
+ /**
+ * Control command: Disable survivability/DCT mode
+ * This command has no payload
+ */
VPU_JSM_MSG_DCT_DISABLE = 0x111d,
/**
* Dump VPU state. To be used for debug purposes only.
- * NOTE: Please introduce new ASYNC commands before this one. *
+ * This command has no payload.
+ * NOTE: Please introduce new ASYNC commands before this one.
*/
VPU_JSM_MSG_STATE_DUMP = 0x11FF,
- /* IPC Host -> Device, General commands */
+ /** IPC Host -> Device, base id for general commands */
VPU_JSM_MSG_GENERAL_CMD = 0x1200,
+ /** Unsupported command */
VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED = VPU_JSM_MSG_GENERAL_CMD,
/**
* Control dyndbg behavior by executing a dyndbg command; equivalent to
* Linux command:
* @verbatim echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control @endverbatim
+ * @see vpu_ipc_msg_payload_dyndbg_control
*/
VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201,
/**
@@ -607,7 +627,10 @@ enum vpu_ipc_msg_type {
*/
VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202,
- /* IPC Device -> Host, Job completion */
+ /**
+ * IPC Device -> Host, Job completion
+ * @see struct vpu_ipc_msg_payload_job_done
+ */
VPU_JSM_MSG_JOB_DONE = 0x2100,
/**
* IPC Device -> Host, Fence signalled
@@ -622,6 +645,10 @@ enum vpu_ipc_msg_type {
* @see vpu_ipc_msg_payload_engine_reset_done
*/
VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE,
+ /**
+ * Preempt complete message
+ * @see vpu_ipc_msg_payload_engine_preempt_done
+ */
VPU_JSM_MSG_ENGINE_PREEMPT_DONE = 0x2201,
VPU_JSM_MSG_REGISTER_DB_DONE = 0x2202,
VPU_JSM_MSG_UNREGISTER_DB_DONE = 0x2203,
@@ -729,13 +756,20 @@ enum vpu_ipc_msg_type {
* @see vpu_ipc_msg_payload_hws_resume_engine
*/
VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE = 0x221c,
- /* Response to control command: Enable survivability/DCT mode */
+ /**
+ * Response to control command: Enable survivability/DCT mode
+ * This command has no payload
+ */
VPU_JSM_MSG_DCT_ENABLE_DONE = 0x221d,
- /* Response to control command: Disable survivability/DCT mode */
+ /**
+ * Response to control command: Disable survivability/DCT mode
+ * This command has no payload
+ */
VPU_JSM_MSG_DCT_DISABLE_DONE = 0x221e,
/**
* Response to state dump control command.
- * NOTE: Please introduce new ASYNC responses before this one. *
+ * This command has no payload.
+ * NOTE: Please introduce new ASYNC responses before this one.
*/
VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF,
@@ -753,20 +787,25 @@ enum vpu_ipc_msg_type {
enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED };
-/*
- * Host <-> LRT IPC message payload definitions
+/**
+ * Engine reset request payload
+ * @see VPU_JSM_MSG_ENGINE_RESET
*/
struct vpu_ipc_msg_payload_engine_reset {
- /* Engine to be reset. */
+ /** Engine to be reset. */
u32 engine_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
};
+/**
+ * Engine preemption request struct
+ * @see VPU_JSM_MSG_ENGINE_PREEMPT
+ */
struct vpu_ipc_msg_payload_engine_preempt {
- /* Engine to be preempted. */
+ /** Engine to be preempted. */
u32 engine_idx;
- /* ID of the preemption request. */
+ /** ID of the preemption request. */
u32 preempt_id;
};
@@ -935,20 +974,24 @@ struct vpu_jsm_metric_streamer_update {
u64 next_buffer_size;
};
+/**
+ * Device -> host job completion message.
+ * @see VPU_JSM_MSG_JOB_DONE
+ */
struct vpu_ipc_msg_payload_job_done {
- /* Engine to which the job was submitted. */
+ /** Engine to which the job was submitted. */
u32 engine_idx;
- /* Index of the doorbell to which the job was submitted */
+ /** Index of the doorbell to which the job was submitted */
u32 db_idx;
- /* ID of the completed job */
+ /** ID of the completed job */
u32 job_id;
- /* Status of the completed job */
+ /** Status of the completed job */
u32 job_status;
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
};
@@ -997,10 +1040,14 @@ struct vpu_ipc_msg_payload_engine_reset_done {
impacted_contexts[VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS];
};
+/**
+ * Preemption response struct
+ * @see VPU_JSM_MSG_ENGINE_PREEMPT_DONE
+ */
struct vpu_ipc_msg_payload_engine_preempt_done {
- /* Engine preempted. */
+ /** Engine preempted. */
u32 engine_idx;
- /* ID of the preemption request. */
+ /** ID of the preemption request. */
u32 preempt_id;
};
@@ -1552,29 +1599,24 @@ struct vpu_jsm_metric_counter_descriptor {
};
/**
- * Payload for VPU_JSM_MSG_DYNDBG_CONTROL requests.
+ * Payload for @ref VPU_JSM_MSG_DYNDBG_CONTROL requests.
*
- * VPU_JSM_MSG_DYNDBG_CONTROL are used to control the VPU FW Dynamic Debug
- * feature, which allows developers to selectively enable / disable MVLOG_DEBUG
- * messages. This is equivalent to the Dynamic Debug functionality provided by
- * Linux
- * (https://www.kernel.org/doc/html/latest/admin-guide/dynamic-debug-howto.html)
- * The host can control Dynamic Debug behavior by sending dyndbg commands, which
- * have the same syntax as Linux
- * dyndbg commands.
+ * VPU_JSM_MSG_DYNDBG_CONTROL requests are used to control the VPU FW dynamic debug
+ * feature, which allows developers to selectively enable/disable code to obtain
+ * additional FW information. This is equivalent to the dynamic debug functionality
+ * provided by Linux. The host can control dynamic debug behavior by sending dyndbg
+ * commands, using the same syntax as for Linux dynamic debug commands.
*
- * NOTE: in order for MVLOG_DEBUG messages to be actually printed, the host
- * still has to set the logging level to MVLOG_DEBUG, using the
- * VPU_JSM_MSG_TRACE_SET_CONFIG command.
+ * @see https://www.kernel.org/doc/html/latest/admin-guide/dynamic-debug-howto.html.
*
- * The host can see the current dynamic debug configuration by executing a
- * special 'show' command. The dyndbg configuration will be printed to the
- * configured logging destination using MVLOG_INFO logging level.
+ * NOTE:
+ * As the dynamic debug feature uses MVLOG messages to provide information, the host
+ * must first set the logging level to MVLOG_DEBUG, using the @ref VPU_JSM_MSG_TRACE_SET_CONFIG
+ * command.
*/
struct vpu_ipc_msg_payload_dyndbg_control {
/**
- * Dyndbg command (same format as Linux dyndbg); must be a NULL-terminated
- * string.
+ * Dyndbg command to be executed.
*/
char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN];
};
@@ -1595,7 +1637,7 @@ struct vpu_ipc_msg_payload_pwr_d0i3_enter {
};
/**
- * Payload for VPU_JSM_MSG_DCT_ENABLE message.
+ * Payload for @ref VPU_JSM_MSG_DCT_ENABLE message.
*
* Default values for DCT active/inactive times are 5.3ms and 30ms respectively,
* corresponding to a 85% duty cycle. This payload allows the host to tune these
@@ -1652,28 +1694,28 @@ union vpu_ipc_msg_payload {
struct vpu_ipc_msg_payload_pwr_dct_control pwr_dct_control;
};
-/*
- * Host <-> LRT IPC message base structure.
+/**
+ * Host <-> NPU IPC message base structure.
*
* NOTE: All instances of this object must be aligned on a 64B boundary
* to allow proper handling of VPU cache operations.
*/
struct vpu_jsm_msg {
- /* Reserved */
+ /** Reserved */
u64 reserved_0;
- /* Message type, see vpu_ipc_msg_type enum. */
+ /** Message type, see @ref vpu_ipc_msg_type. */
u32 type;
- /* Buffer status, see vpu_ipc_msg_status enum. */
+ /** Buffer status, see @ref vpu_ipc_msg_status. */
u32 status;
- /*
+ /**
* Request ID, provided by the host in a request message and passed
* back by VPU in the response message.
*/
u32 request_id;
- /* Request return code set by the VPU, see VPU_JSM_STATUS_* defines. */
+ /** Request return code set by the VPU, see VPU_JSM_STATUS_* defines. */
u32 result;
u64 reserved_1;
- /* Message payload depending on message type, see vpu_ipc_msg_payload union. */
+ /** Message payload depending on message type, see vpu_ipc_msg_payload union. */
union vpu_ipc_msg_payload payload;
};
diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c
index b86a8e48e731..9af8333db513 100644
--- a/drivers/accel/qaic/qaic_control.c
+++ b/drivers/accel/qaic/qaic_control.c
@@ -17,6 +17,7 @@
#include <linux/overflow.h>
#include <linux/pci.h>
#include <linux/scatterlist.h>
+#include <linux/sched/signal.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/workqueue.h>
@@ -655,8 +656,9 @@ static int encode_activate(struct qaic_device *qdev, void *trans, struct wrapper
return -EINVAL;
nelem = in_trans->queue_size;
- size = (get_dbc_req_elem_size() + get_dbc_rsp_elem_size()) * nelem;
- if (size / nelem != get_dbc_req_elem_size() + get_dbc_rsp_elem_size())
+ if (check_mul_overflow((u32)(get_dbc_req_elem_size() + get_dbc_rsp_elem_size()),
+ nelem,
+ &size))
return -EINVAL;
if (size + QAIC_DBC_Q_GAP + QAIC_DBC_Q_BUF_ALIGN < size)
@@ -810,7 +812,7 @@ static int encode_message(struct qaic_device *qdev, struct manage_msg *user_msg,
}
if (ret)
- break;
+ goto out;
}
if (user_len != user_msg->len)
@@ -1079,7 +1081,6 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u
list_for_each_entry(w, &wrappers->list, list) {
kref_get(&w->ref_count);
- retry_count = 0;
ret = mhi_queue_buf(qdev->cntl_ch, DMA_TO_DEVICE, &w->msg, w->len,
list_is_last(&w->list, &wrappers->list) ? MHI_EOT : MHI_CHAIN);
if (ret) {
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index c4f117edb266..703ef0ce9da1 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -18,6 +18,7 @@
#include <linux/scatterlist.h>
#include <linux/spinlock.h>
#include <linux/srcu.h>
+#include <linux/string.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/wait.h>
@@ -165,7 +166,7 @@ static void free_slice(struct kref *kref)
drm_gem_object_put(&slice->bo->base);
sg_free_table(slice->sgt);
kfree(slice->sgt);
- kfree(slice->reqs);
+ kvfree(slice->reqs);
kfree(slice);
}
@@ -404,7 +405,7 @@ static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo,
goto free_sgt;
}
- slice->reqs = kcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL);
+ slice->reqs = kvcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL);
if (!slice->reqs) {
ret = -ENOMEM;
goto free_slice;
@@ -430,7 +431,7 @@ static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo,
return 0;
free_req:
- kfree(slice->reqs);
+ kvfree(slice->reqs);
free_slice:
kfree(slice);
free_sgt:
@@ -643,8 +644,36 @@ static void qaic_free_object(struct drm_gem_object *obj)
kfree(bo);
}
+static struct sg_table *qaic_get_sg_table(struct drm_gem_object *obj)
+{
+ struct qaic_bo *bo = to_qaic_bo(obj);
+ struct scatterlist *sg, *sg_in;
+ struct sg_table *sgt, *sgt_in;
+ int i;
+
+ sgt_in = bo->sgt;
+
+ sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt)
+ return ERR_PTR(-ENOMEM);
+
+ if (sg_alloc_table(sgt, sgt_in->orig_nents, GFP_KERNEL)) {
+ kfree(sgt);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ sg = sgt->sgl;
+ for_each_sgtable_sg(sgt_in, sg_in, i) {
+ memcpy(sg, sg_in, sizeof(*sg));
+ sg = sg_next(sg);
+ }
+
+ return sgt;
+}
+
static const struct drm_gem_object_funcs qaic_gem_funcs = {
.free = qaic_free_object,
+ .get_sg_table = qaic_get_sg_table,
.print_info = qaic_gem_print_info,
.mmap = qaic_gem_object_mmap,
.vm_ops = &drm_vm_ops,
@@ -953,8 +982,9 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
if (args->hdr.count == 0)
return -EINVAL;
- arg_size = args->hdr.count * sizeof(*slice_ent);
- if (arg_size / args->hdr.count != sizeof(*slice_ent))
+ if (check_mul_overflow((unsigned long)args->hdr.count,
+ (unsigned long)sizeof(*slice_ent),
+ &arg_size))
return -EINVAL;
if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE))
@@ -984,18 +1014,12 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
user_data = u64_to_user_ptr(args->data);
- slice_ent = kzalloc(arg_size, GFP_KERNEL);
- if (!slice_ent) {
- ret = -EINVAL;
+ slice_ent = memdup_user(user_data, arg_size);
+ if (IS_ERR(slice_ent)) {
+ ret = PTR_ERR(slice_ent);
goto unlock_dev_srcu;
}
- ret = copy_from_user(slice_ent, user_data, arg_size);
- if (ret) {
- ret = -EFAULT;
- goto free_slice_ent;
- }
-
obj = drm_gem_object_lookup(file_priv, args->hdr.handle);
if (!obj) {
ret = -ENOENT;
@@ -1300,8 +1324,6 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
int usr_rcu_id, qdev_rcu_id;
struct qaic_device *qdev;
struct qaic_user *usr;
- u8 __user *user_data;
- unsigned long n;
u64 received_ts;
u32 queue_level;
u64 submit_ts;
@@ -1314,20 +1336,12 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
received_ts = ktime_get_ns();
size = is_partial ? sizeof(struct qaic_partial_execute_entry) : sizeof(*exec);
- n = (unsigned long)size * args->hdr.count;
- if (args->hdr.count == 0 || n / args->hdr.count != size)
+ if (args->hdr.count == 0)
return -EINVAL;
- user_data = u64_to_user_ptr(args->data);
-
- exec = kcalloc(args->hdr.count, size, GFP_KERNEL);
- if (!exec)
- return -ENOMEM;
-
- if (copy_from_user(exec, user_data, n)) {
- ret = -EFAULT;
- goto free_exec;
- }
+ exec = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, size);
+ if (IS_ERR(exec))
+ return PTR_ERR(exec);
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
@@ -1396,7 +1410,6 @@ unlock_dev_srcu:
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
unlock_usr_srcu:
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
-free_exec:
kfree(exec);
return ret;
}
@@ -1749,7 +1762,8 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
struct qaic_device *qdev;
struct qaic_user *usr;
struct qaic_bo *bo;
- int ret, i;
+ int ret = 0;
+ int i;
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
@@ -1770,18 +1784,12 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
goto unlock_dev_srcu;
}
- ent = kcalloc(args->hdr.count, sizeof(*ent), GFP_KERNEL);
- if (!ent) {
- ret = -EINVAL;
+ ent = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, sizeof(*ent));
+ if (IS_ERR(ent)) {
+ ret = PTR_ERR(ent);
goto unlock_dev_srcu;
}
- ret = copy_from_user(ent, u64_to_user_ptr(args->data), args->hdr.count * sizeof(*ent));
- if (ret) {
- ret = -EFAULT;
- goto free_ent;
- }
-
for (i = 0; i < args->hdr.count; i++) {
obj = drm_gem_object_lookup(file_priv, ent[i].handle);
if (!obj) {
@@ -1789,6 +1797,16 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
goto free_ent;
}
bo = to_qaic_bo(obj);
+ if (!bo->sliced) {
+ drm_gem_object_put(obj);
+ ret = -EINVAL;
+ goto free_ent;
+ }
+ if (bo->dbc->id != args->hdr.dbc_id) {
+ drm_gem_object_put(obj);
+ ret = -EINVAL;
+ goto free_ent;
+ }
/*
* perf stats ioctl is called before wait ioctl is complete then
* the latency information is invalid.
diff --git a/drivers/accel/qaic/qaic_ras.c b/drivers/accel/qaic/qaic_ras.c
index 914ffc4a9970..f1d52a710136 100644
--- a/drivers/accel/qaic/qaic_ras.c
+++ b/drivers/accel/qaic/qaic_ras.c
@@ -514,21 +514,21 @@ static ssize_t ce_count_show(struct device *dev, struct device_attribute *attr,
{
struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
- return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ce_count);
+ return sysfs_emit(buf, "%d\n", qdev->ce_count);
}
static ssize_t ue_count_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
- return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_count);
+ return sysfs_emit(buf, "%d\n", qdev->ue_count);
}
static ssize_t ue_nonfatal_count_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
- return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_nf_count);
+ return sysfs_emit(buf, "%d\n", qdev->ue_nf_count);
}
static DEVICE_ATTR_RO(ce_count);
diff --git a/drivers/accel/qaic/sahara.c b/drivers/accel/qaic/sahara.c
index 3ebcc1f7ff58..d5d16cd0d50d 100644
--- a/drivers/accel/qaic/sahara.c
+++ b/drivers/accel/qaic/sahara.c
@@ -159,6 +159,7 @@ struct sahara_context {
struct sahara_packet *rx;
struct work_struct fw_work;
struct work_struct dump_work;
+ struct work_struct read_data_work;
struct mhi_device *mhi_dev;
const char * const *image_table;
u32 table_size;
@@ -174,7 +175,10 @@ struct sahara_context {
u64 dump_image_offset;
void *mem_dump_freespace;
u64 dump_images_left;
+ u32 read_data_offset;
+ u32 read_data_length;
bool is_mem_dump_mode;
+ bool non_streaming;
};
static const char * const aic100_image_table[] = {
@@ -216,6 +220,11 @@ static const char * const aic200_image_table[] = {
[75] = "qcom/aic200/pvs.bin",
};
+static bool is_streaming(struct sahara_context *context)
+{
+ return !context->non_streaming;
+}
+
static int sahara_find_image(struct sahara_context *context, u32 image_id)
{
int ret;
@@ -265,6 +274,8 @@ static void sahara_send_reset(struct sahara_context *context)
int ret;
context->is_mem_dump_mode = false;
+ context->read_data_offset = 0;
+ context->read_data_length = 0;
context->tx[0]->cmd = cpu_to_le32(SAHARA_RESET_CMD);
context->tx[0]->length = cpu_to_le32(SAHARA_RESET_LENGTH);
@@ -319,9 +330,39 @@ static void sahara_hello(struct sahara_context *context)
dev_err(&context->mhi_dev->dev, "Unable to send hello response %d\n", ret);
}
+static int read_data_helper(struct sahara_context *context, int buf_index)
+{
+ enum mhi_flags mhi_flag;
+ u32 pkt_data_len;
+ int ret;
+
+ pkt_data_len = min(context->read_data_length, SAHARA_PACKET_MAX_SIZE);
+
+ memcpy(context->tx[buf_index],
+ &context->firmware->data[context->read_data_offset],
+ pkt_data_len);
+
+ context->read_data_offset += pkt_data_len;
+ context->read_data_length -= pkt_data_len;
+
+ if (is_streaming(context) || !context->read_data_length)
+ mhi_flag = MHI_EOT;
+ else
+ mhi_flag = MHI_CHAIN;
+
+ ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE,
+ context->tx[buf_index], pkt_data_len, mhi_flag);
+ if (ret) {
+ dev_err(&context->mhi_dev->dev, "Unable to send read_data response %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static void sahara_read_data(struct sahara_context *context)
{
- u32 image_id, data_offset, data_len, pkt_data_len;
+ u32 image_id, data_offset, data_len;
int ret;
int i;
@@ -357,7 +398,7 @@ static void sahara_read_data(struct sahara_context *context)
* and is not needed here on error.
*/
- if (data_len > SAHARA_TRANSFER_MAX_SIZE) {
+ if (context->non_streaming && data_len > SAHARA_TRANSFER_MAX_SIZE) {
dev_err(&context->mhi_dev->dev, "Malformed read_data packet - data len %d exceeds max xfer size %d\n",
data_len, SAHARA_TRANSFER_MAX_SIZE);
sahara_send_reset(context);
@@ -378,22 +419,18 @@ static void sahara_read_data(struct sahara_context *context)
return;
}
- for (i = 0; i < SAHARA_NUM_TX_BUF && data_len; ++i) {
- pkt_data_len = min(data_len, SAHARA_PACKET_MAX_SIZE);
-
- memcpy(context->tx[i], &context->firmware->data[data_offset], pkt_data_len);
+ context->read_data_offset = data_offset;
+ context->read_data_length = data_len;
- data_offset += pkt_data_len;
- data_len -= pkt_data_len;
+ if (is_streaming(context)) {
+ schedule_work(&context->read_data_work);
+ return;
+ }
- ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE,
- context->tx[i], pkt_data_len,
- !data_len ? MHI_EOT : MHI_CHAIN);
- if (ret) {
- dev_err(&context->mhi_dev->dev, "Unable to send read_data response %d\n",
- ret);
- return;
- }
+ for (i = 0; i < SAHARA_NUM_TX_BUF && context->read_data_length; ++i) {
+ ret = read_data_helper(context, i);
+ if (ret)
+ break;
}
}
@@ -538,6 +575,7 @@ static void sahara_parse_dump_table(struct sahara_context *context)
struct sahara_memory_dump_meta_v1 *dump_meta;
u64 table_nents;
u64 dump_length;
+ u64 mul_bytes;
int ret;
u64 i;
@@ -551,8 +589,9 @@ static void sahara_parse_dump_table(struct sahara_context *context)
dev_table[i].description[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0;
dev_table[i].filename[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0;
- dump_length = size_add(dump_length, le64_to_cpu(dev_table[i].length));
- if (dump_length == SIZE_MAX) {
+ if (check_add_overflow(dump_length,
+ le64_to_cpu(dev_table[i].length),
+ &dump_length)) {
/* Discard the dump */
sahara_send_reset(context);
return;
@@ -568,14 +607,17 @@ static void sahara_parse_dump_table(struct sahara_context *context)
dev_table[i].filename);
}
- dump_length = size_add(dump_length, sizeof(*dump_meta));
- if (dump_length == SIZE_MAX) {
+ if (check_add_overflow(dump_length, (u64)sizeof(*dump_meta), &dump_length)) {
/* Discard the dump */
sahara_send_reset(context);
return;
}
- dump_length = size_add(dump_length, size_mul(sizeof(*image_out_table), table_nents));
- if (dump_length == SIZE_MAX) {
+ if (check_mul_overflow((u64)sizeof(*image_out_table), table_nents, &mul_bytes)) {
+ /* Discard the dump */
+ sahara_send_reset(context);
+ return;
+ }
+ if (check_add_overflow(dump_length, mul_bytes, &dump_length)) {
/* Discard the dump */
sahara_send_reset(context);
return;
@@ -615,7 +657,7 @@ static void sahara_parse_dump_table(struct sahara_context *context)
/* Request the first chunk of the first image */
context->dump_image = &image_out_table[0];
- dump_length = min(context->dump_image->length, SAHARA_READ_MAX_SIZE);
+ dump_length = min_t(u64, context->dump_image->length, SAHARA_READ_MAX_SIZE);
/* Avoid requesting EOI sized data so that we can identify errors */
if (dump_length == SAHARA_END_OF_IMAGE_LENGTH)
dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2;
@@ -663,7 +705,7 @@ static void sahara_parse_dump_image(struct sahara_context *context)
/* Get next image chunk */
dump_length = context->dump_image->length - context->dump_image_offset;
- dump_length = min(dump_length, SAHARA_READ_MAX_SIZE);
+ dump_length = min_t(u64, dump_length, SAHARA_READ_MAX_SIZE);
/* Avoid requesting EOI sized data so that we can identify errors */
if (dump_length == SAHARA_END_OF_IMAGE_LENGTH)
dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2;
@@ -742,6 +784,13 @@ error:
sahara_send_reset(context);
}
+static void sahara_read_data_processing(struct work_struct *work)
+{
+ struct sahara_context *context = container_of(work, struct sahara_context, read_data_work);
+
+ read_data_helper(context, 0);
+}
+
static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
{
struct sahara_context *context;
@@ -756,34 +805,56 @@ static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_
if (!context->rx)
return -ENOMEM;
+ if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) {
+ context->image_table = aic200_image_table;
+ context->table_size = ARRAY_SIZE(aic200_image_table);
+ } else {
+ context->image_table = aic100_image_table;
+ context->table_size = ARRAY_SIZE(aic100_image_table);
+ context->non_streaming = true;
+ }
+
/*
- * AIC100 defines SAHARA_TRANSFER_MAX_SIZE as the largest value it
- * will request for READ_DATA. This is larger than
- * SAHARA_PACKET_MAX_SIZE, and we need 9x SAHARA_PACKET_MAX_SIZE to
- * cover SAHARA_TRANSFER_MAX_SIZE. When the remote side issues a
- * READ_DATA, it requires a transfer of the exact size requested. We
- * can use MHI_CHAIN to link multiple buffers into a single transfer
- * but the remote side will not consume the buffers until it sees an
- * EOT, thus we need to allocate enough buffers to put in the tx fifo
- * to cover an entire READ_DATA request of the max size.
+ * There are two firmware implementations for READ_DATA handling.
+ * The older "SBL" implementation defines a Sahara transfer size, and
+ * expects that the response is a single transport transfer. If the
+ * FW wants to transfer a file that is larger than the transfer size,
+ * the FW will issue multiple READ_DATA commands. For this
+ * implementation, we need to allocate enough buffers to contain the
+ * entire Sahara transfer size.
+ *
+ * The newer "XBL" implementation does not define a maximum transfer
+ * size and instead expects the data to be streamed over using the
+ * transport level MTU. The FW will issue a single READ_DATA command
+ * of whatever size, and consume multiple transport level transfers
+ * until the expected amount of data is consumed. For this
+ * implementation we only need a single buffer of the transport MTU
+ * but we'll need to be able to use it multiple times for a single
+ * READ_DATA request.
+ *
+ * AIC100 is the SBL implementation and defines SAHARA_TRANSFER_MAX_SIZE
+ * and we need 9x SAHARA_PACKET_MAX_SIZE to cover that. We can use
+ * MHI_CHAIN to link multiple buffers into a single transfer but the
+ * remote side will not consume the buffers until it sees an EOT, thus
+ * we need to allocate enough buffers to put in the tx fifo to cover an
+ * entire READ_DATA request of the max size.
+ *
+ * AIC200 is the XBL implementation, and so a single buffer will work.
*/
for (i = 0; i < SAHARA_NUM_TX_BUF; ++i) {
- context->tx[i] = devm_kzalloc(&mhi_dev->dev, SAHARA_PACKET_MAX_SIZE, GFP_KERNEL);
+ context->tx[i] = devm_kzalloc(&mhi_dev->dev,
+ SAHARA_PACKET_MAX_SIZE,
+ GFP_KERNEL);
if (!context->tx[i])
return -ENOMEM;
+ if (is_streaming(context))
+ break;
}
context->mhi_dev = mhi_dev;
INIT_WORK(&context->fw_work, sahara_processing);
INIT_WORK(&context->dump_work, sahara_dump_processing);
-
- if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) {
- context->image_table = aic200_image_table;
- context->table_size = ARRAY_SIZE(aic200_image_table);
- } else {
- context->image_table = aic100_image_table;
- context->table_size = ARRAY_SIZE(aic100_image_table);
- }
+ INIT_WORK(&context->read_data_work, sahara_read_data_processing);
context->active_image_id = SAHARA_IMAGE_ID_NONE;
dev_set_drvdata(&mhi_dev->dev, context);
@@ -814,6 +885,10 @@ static void sahara_mhi_remove(struct mhi_device *mhi_dev)
static void sahara_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
{
+ struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev);
+
+ if (!mhi_result->transaction_status && context->read_data_length && is_streaming(context))
+ schedule_work(&context->read_data_work);
}
static void sahara_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)