diff options
Diffstat (limited to 'drivers/accel/ethosu/ethosu_job.c')
| -rw-r--r-- | drivers/accel/ethosu/ethosu_job.c | 497 |
1 files changed, 497 insertions, 0 deletions
diff --git a/drivers/accel/ethosu/ethosu_job.c b/drivers/accel/ethosu/ethosu_job.c new file mode 100644 index 000000000000..26e7a2f64d71 --- /dev/null +++ b/drivers/accel/ethosu/ethosu_job.c @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ +/* Copyright 2025 Arm, Ltd. */ + +#include <linux/bitfield.h> +#include <linux/genalloc.h> +#include <linux/interrupt.h> +#include <linux/iopoll.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> + +#include <drm/drm_file.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_dma_helper.h> +#include <drm/drm_print.h> +#include <drm/ethosu_accel.h> + +#include "ethosu_device.h" +#include "ethosu_drv.h" +#include "ethosu_gem.h" +#include "ethosu_job.h" + +#define JOB_TIMEOUT_MS 500 + +static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct ethosu_job, base); +} + +static const char *ethosu_fence_get_driver_name(struct dma_fence *fence) +{ + return "ethosu"; +} + +static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence) +{ + return "ethosu-npu"; +} + +static const struct dma_fence_ops ethosu_fence_ops = { + .get_driver_name = ethosu_fence_get_driver_name, + .get_timeline_name = ethosu_fence_get_timeline_name, +}; + +static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job) +{ + struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo); + struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(job->cmd_bo)->info; + + for (int i = 0; i < job->region_cnt; i++) { + struct drm_gem_dma_object *bo; + int region = job->region_bo_num[i]; + + bo = to_drm_gem_dma_obj(job->region_bo[i]); + writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region)); + writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region)); + dev_dbg(dev->base.dev, "Region %d base addr = %pad\n", region, &bo->dma_addr); + } + + if (job->sram_size) { + writel_relaxed(lower_32_bits(dev->sramphys), + dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION)); + writel_relaxed(upper_32_bits(dev->sramphys), + dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION)); + dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n", + ETHOSU_SRAM_REGION, &dev->sramphys); + } + + writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE); + writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI); + writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE); + + writel(CMD_TRANSITION_TO_RUN, dev->regs + NPU_REG_CMD); + + dev_dbg(dev->base.dev, + "Submitted cmd at %pad to core\n", &cmd_bo->dma_addr); +} + +static int ethosu_acquire_object_fences(struct ethosu_job *job) +{ + int i, ret; + struct drm_gem_object **bos = job->region_bo; + struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info; + + for (i = 0; i < job->region_cnt; i++) { + bool is_write; + + if (!bos[i]) + break; + + ret = dma_resv_reserve_fences(bos[i]->resv, 1); + if (ret) + return ret; + + is_write = info->output_region[job->region_bo_num[i]]; + ret = drm_sched_job_add_implicit_dependencies(&job->base, bos[i], + is_write); + if (ret) + return ret; + } + + return 0; +} + +static void ethosu_attach_object_fences(struct ethosu_job *job) +{ + int i; + struct dma_fence *fence = job->inference_done_fence; + struct drm_gem_object **bos = job->region_bo; + struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info; + + for (i = 0; i < job->region_cnt; i++) + if (info->output_region[job->region_bo_num[i]]) + dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); +} + +static int ethosu_job_push(struct ethosu_job *job) +{ + struct ww_acquire_ctx acquire_ctx; + int ret; + + ret = drm_gem_lock_reservations(job->region_bo, job->region_cnt, &acquire_ctx); + if (ret) + return ret; + + ret = ethosu_acquire_object_fences(job); + if (ret) + goto out; + + ret = pm_runtime_resume_and_get(job->dev->base.dev); + if (!ret) { + guard(mutex)(&job->dev->sched_lock); + + drm_sched_job_arm(&job->base); + job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished); + kref_get(&job->refcount); /* put by scheduler job completion */ + drm_sched_entity_push_job(&job->base); + ethosu_attach_object_fences(job); + } + +out: + drm_gem_unlock_reservations(job->region_bo, job->region_cnt, &acquire_ctx); + return ret; +} + +static void ethosu_job_cleanup(struct kref *ref) +{ + struct ethosu_job *job = container_of(ref, struct ethosu_job, + refcount); + unsigned int i; + + pm_runtime_put_autosuspend(job->dev->base.dev); + + dma_fence_put(job->done_fence); + dma_fence_put(job->inference_done_fence); + + for (i = 0; i < job->region_cnt; i++) + drm_gem_object_put(job->region_bo[i]); + + drm_gem_object_put(job->cmd_bo); + + kfree(job); +} + +static void ethosu_job_put(struct ethosu_job *job) +{ + kref_put(&job->refcount, ethosu_job_cleanup); +} + +static void ethosu_job_free(struct drm_sched_job *sched_job) +{ + struct ethosu_job *job = to_ethosu_job(sched_job); + + drm_sched_job_cleanup(sched_job); + ethosu_job_put(job); +} + +static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job) +{ + struct ethosu_job *job = to_ethosu_job(sched_job); + struct ethosu_device *dev = job->dev; + struct dma_fence *fence = job->done_fence; + + if (unlikely(job->base.s_fence->finished.error)) + return NULL; + + dma_fence_init(fence, ðosu_fence_ops, &dev->fence_lock, + dev->fence_context, ++dev->emit_seqno); + dma_fence_get(fence); + + scoped_guard(mutex, &dev->job_lock) { + dev->in_flight_job = job; + ethosu_job_hw_submit(dev, job); + } + + return fence; +} + +static void ethosu_job_handle_irq(struct ethosu_device *dev) +{ + u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); + + if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) { + dev_err(dev->base.dev, "Error IRQ - %x\n", status); + drm_sched_fault(&dev->sched); + return; + } + + scoped_guard(mutex, &dev->job_lock) { + if (dev->in_flight_job) { + dma_fence_signal(dev->in_flight_job->done_fence); + dev->in_flight_job = NULL; + } + } +} + +static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data) +{ + struct ethosu_device *dev = data; + + ethosu_job_handle_irq(dev); + + return IRQ_HANDLED; +} + +static irqreturn_t ethosu_job_irq_handler(int irq, void *data) +{ + struct ethosu_device *dev = data; + u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); + + if (!(status & STATUS_IRQ_RAISED)) + return IRQ_NONE; + + writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD); + return IRQ_WAKE_THREAD; +} + +static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad) +{ + struct ethosu_job *job = to_ethosu_job(bad); + struct ethosu_device *dev = job->dev; + bool running; + u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr; + u32 cmdaddr; + + cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD); + running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS)); + + if (running) { + int ret; + u32 reg; + + ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD, + reg, + reg != cmdaddr, + USEC_PER_MSEC, 100 * USEC_PER_MSEC); + + /* If still running and progress is being made, just return */ + if (!ret) + return DRM_GPU_SCHED_STAT_NO_HANG; + } + + dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n", + running ? "running" : "stopped", + cmdaddr, bocmds[cmdaddr / 4]); + + drm_sched_stop(&dev->sched, bad); + + scoped_guard(mutex, &dev->job_lock) + dev->in_flight_job = NULL; + + /* Proceed with reset now. */ + pm_runtime_force_suspend(dev->base.dev); + pm_runtime_force_resume(dev->base.dev); + + /* Restart the scheduler */ + drm_sched_start(&dev->sched, 0); + + return DRM_GPU_SCHED_STAT_RESET; +} + +static const struct drm_sched_backend_ops ethosu_sched_ops = { + .run_job = ethosu_job_run, + .timedout_job = ethosu_job_timedout, + .free_job = ethosu_job_free +}; + +int ethosu_job_init(struct ethosu_device *edev) +{ + struct device *dev = edev->base.dev; + struct drm_sched_init_args args = { + .ops = ðosu_sched_ops, + .num_rqs = DRM_SCHED_PRIORITY_COUNT, + .credit_limit = 1, + .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), + .name = dev_name(dev), + .dev = dev, + }; + int ret; + + spin_lock_init(&edev->fence_lock); + ret = devm_mutex_init(dev, &edev->job_lock); + if (ret) + return ret; + ret = devm_mutex_init(dev, &edev->sched_lock); + if (ret) + return ret; + + edev->irq = platform_get_irq(to_platform_device(dev), 0); + if (edev->irq < 0) + return edev->irq; + + ret = devm_request_threaded_irq(dev, edev->irq, + ethosu_job_irq_handler, + ethosu_job_irq_handler_thread, + IRQF_SHARED, KBUILD_MODNAME, + edev); + if (ret) { + dev_err(dev, "failed to request irq\n"); + return ret; + } + + edev->fence_context = dma_fence_context_alloc(1); + + ret = drm_sched_init(&edev->sched, &args); + if (ret) { + dev_err(dev, "Failed to create scheduler: %d\n", ret); + goto err_sched; + } + + return 0; + +err_sched: + drm_sched_fini(&edev->sched); + return ret; +} + +void ethosu_job_fini(struct ethosu_device *dev) +{ + drm_sched_fini(&dev->sched); +} + +int ethosu_job_open(struct ethosu_file_priv *ethosu_priv) +{ + struct ethosu_device *dev = ethosu_priv->edev; + struct drm_gpu_scheduler *sched = &dev->sched; + int ret; + + ret = drm_sched_entity_init(ðosu_priv->sched_entity, + DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + return WARN_ON(ret); +} + +void ethosu_job_close(struct ethosu_file_priv *ethosu_priv) +{ + struct drm_sched_entity *entity = ðosu_priv->sched_entity; + + drm_sched_entity_destroy(entity); +} + +static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file, + struct drm_ethosu_job *job) +{ + struct ethosu_device *edev = to_ethosu_device(dev); + struct ethosu_file_priv *file_priv = file->driver_priv; + struct ethosu_job *ejob = NULL; + struct ethosu_validated_cmdstream_info *cmd_info; + int ret = 0; + + /* BO region 2 is reserved if SRAM is used */ + if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size) + return -EINVAL; + + if (edev->npu_info.sram_size < job->sram_size) + return -EINVAL; + + ejob = kzalloc(sizeof(*ejob), GFP_KERNEL); + if (!ejob) + return -ENOMEM; + + kref_init(&ejob->refcount); + + ejob->dev = edev; + ejob->sram_size = job->sram_size; + + ejob->done_fence = kzalloc(sizeof(*ejob->done_fence), GFP_KERNEL); + if (!ejob->done_fence) { + ret = -ENOMEM; + goto out_cleanup_job; + } + + ret = drm_sched_job_init(&ejob->base, + &file_priv->sched_entity, + 1, NULL, file->client_id); + if (ret) + goto out_put_job; + + ejob->cmd_bo = drm_gem_object_lookup(file, job->cmd_bo); + if (!ejob->cmd_bo) { + ret = -ENOENT; + goto out_cleanup_job; + } + cmd_info = to_ethosu_bo(ejob->cmd_bo)->info; + if (!cmd_info) { + ret = -EINVAL; + goto out_cleanup_job; + } + + for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) { + struct drm_gem_object *gem; + + /* Can only omit a BO handle if the region is not used or used for SRAM */ + if (!job->region_bo_handles[i] && + (!cmd_info->region_size[i] || (i == ETHOSU_SRAM_REGION && job->sram_size))) + continue; + + if (job->region_bo_handles[i] && !cmd_info->region_size[i]) { + dev_err(dev->dev, + "Cmdstream BO handle %d set for unused region %d\n", + job->region_bo_handles[i], i); + ret = -EINVAL; + goto out_cleanup_job; + } + + gem = drm_gem_object_lookup(file, job->region_bo_handles[i]); + if (!gem) { + dev_err(dev->dev, + "Invalid BO handle %d for region %d\n", + job->region_bo_handles[i], i); + ret = -ENOENT; + goto out_cleanup_job; + } + + ejob->region_bo[ejob->region_cnt] = gem; + ejob->region_bo_num[ejob->region_cnt] = i; + ejob->region_cnt++; + + if (to_ethosu_bo(gem)->info) { + dev_err(dev->dev, + "Cmdstream BO handle %d used for region %d\n", + job->region_bo_handles[i], i); + ret = -EINVAL; + goto out_cleanup_job; + } + + /* Verify the command stream doesn't have accesses outside the BO */ + if (cmd_info->region_size[i] > gem->size) { + dev_err(dev->dev, + "cmd stream region %d size greater than BO size (%llu > %zu)\n", + i, cmd_info->region_size[i], gem->size); + ret = -EOVERFLOW; + goto out_cleanup_job; + } + } + ret = ethosu_job_push(ejob); + +out_cleanup_job: + if (ret) + drm_sched_job_cleanup(&ejob->base); +out_put_job: + ethosu_job_put(ejob); + + return ret; +} + +int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ethosu_submit *args = data; + int ret = 0; + unsigned int i = 0; + + if (args->pad) { + drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n"); + return -EINVAL; + } + + struct drm_ethosu_job __free(kvfree) *jobs = + kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL); + if (!jobs) + return -ENOMEM; + + if (copy_from_user(jobs, + (void __user *)(uintptr_t)args->jobs, + args->job_count * sizeof(*jobs))) { + drm_dbg(dev, "Failed to copy incoming job array\n"); + return -EFAULT; + } + + for (i = 0; i < args->job_count; i++) { + ret = ethosu_ioctl_submit_job(dev, file, &jobs[i]); + if (ret) + return ret; + } + + return 0; +} |
