summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_exec_queue.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_exec_queue.c')
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c229
1 files changed, 160 insertions, 69 deletions
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index cb5f204c08ed..12adfc3a0547 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -16,6 +16,7 @@
#include "xe_dep_scheduler.h"
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_sriov_vf.h"
#include "xe_hw_engine_class_sysfs.h"
#include "xe_hw_engine_group.h"
#include "xe_hw_fence.h"
@@ -29,6 +30,29 @@
#include "xe_vm.h"
#include "xe_pxp.h"
+/**
+ * DOC: Execution Queue
+ *
+ * An Execution queue is an interface for the HW context of execution.
+ * The user creates an execution queue, submits the GPU jobs through those
+ * queues and in the end destroys them.
+ *
+ * Execution queues can also be created by XeKMD itself for driver internal
+ * operations like object migration etc.
+ *
+ * An execution queue is associated with a specified HW engine or a group of
+ * engines (belonging to the same tile and engine class) and any GPU job
+ * submitted on the queue will be run on one of these engines.
+ *
+ * An execution queue is tied to an address space (VM). It holds a reference
+ * of the associated VM and the underlying Logical Ring Context/s (LRC/s)
+ * until the queue is destroyed.
+ *
+ * The execution queue sits on top of the submission backend. It opaquely
+ * handles the GuC and Execlist backends whichever the platform uses, and
+ * the ring operations the different engine classes support.
+ */
+
enum xe_exec_queue_sched_prop {
XE_EXEC_QUEUE_JOB_TIMEOUT = 0,
XE_EXEC_QUEUE_TIMESLICE = 1,
@@ -161,7 +185,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
return q;
}
-static int __xe_exec_queue_init(struct xe_exec_queue *q)
+static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags)
{
int i, err;
u32 flags = 0;
@@ -180,17 +204,37 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
flags |= XE_LRC_CREATE_RUNALONE;
}
+ if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL))
+ flags |= XE_LRC_CREATE_USER_CTX;
+
+ err = q->ops->init(q);
+ if (err)
+ return err;
+
+ /*
+ * This must occur after q->ops->init to avoid race conditions during VF
+ * post-migration recovery, as the fixups for the LRC GGTT addresses
+ * depend on the queue being present in the backend tracking structure.
+ *
+ * In addition to above, we must wait on inflight GGTT changes to avoid
+ * writing out stale values here. Such wait provides a solid solution
+ * (without a race) only if the function can detect migration instantly
+ * from the moment vCPU resumes execution.
+ */
for (i = 0; i < q->width; ++i) {
- q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags);
- if (IS_ERR(q->lrc[i])) {
- err = PTR_ERR(q->lrc[i]);
+ struct xe_lrc *lrc;
+
+ xe_gt_sriov_vf_wait_valid_ggtt(q->gt);
+ lrc = xe_lrc_create(q->hwe, q->vm, xe_lrc_ring_size(),
+ q->msix_vec, flags);
+ if (IS_ERR(lrc)) {
+ err = PTR_ERR(lrc);
goto err_lrc;
}
- }
- err = q->ops->init(q);
- if (err)
- goto err_lrc;
+ /* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */
+ WRITE_ONCE(q->lrc[i], lrc);
+ }
return 0;
@@ -226,7 +270,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
if (IS_ERR(q))
return q;
- err = __xe_exec_queue_init(q);
+ err = __xe_exec_queue_init(q, flags);
if (err)
goto err_post_alloc;
@@ -343,6 +387,12 @@ void xe_exec_queue_destroy(struct kref *ref)
{
struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
struct xe_exec_queue *eq, *next;
+ int i;
+
+ xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0);
+
+ if (q->ufence_syncobj)
+ drm_syncobj_put(q->ufence_syncobj);
if (q->ufence_syncobj)
drm_syncobj_put(q->ufence_syncobj);
@@ -351,6 +401,9 @@ void xe_exec_queue_destroy(struct kref *ref)
xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
xe_exec_queue_last_fence_put_unlocked(q);
+ for_each_tlb_inval(i)
+ xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i);
+
if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
list_for_each_entry_safe(eq, next, &q->multi_gt_list,
multi_gt_link)
@@ -838,25 +891,6 @@ bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
!(q->flags & EXEC_QUEUE_FLAG_VM);
}
-static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q)
-{
- return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1;
-}
-
-/**
- * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full
- * @q: The exec_queue
- *
- * Return: True if the exec_queue's ring is full, false otherwise.
- */
-bool xe_exec_queue_ring_full(struct xe_exec_queue *q)
-{
- struct xe_lrc *lrc = q->lrc[0];
- s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES;
-
- return xe_exec_queue_num_job_inflight(q) >= max_job;
-}
-
/**
* xe_exec_queue_is_idle() - Whether an exec_queue is idle.
* @q: The exec_queue
@@ -987,7 +1021,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
struct xe_vm *vm)
{
- if (q->flags & EXEC_QUEUE_FLAG_VM) {
+ if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) {
+ xe_migrate_job_lock_assert(q);
+ } else if (q->flags & EXEC_QUEUE_FLAG_VM) {
lockdep_assert_held(&vm->lock);
} else {
xe_vm_assert_held(vm);
@@ -1086,32 +1122,104 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
struct dma_fence *fence)
{
xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, !dma_fence_is_container(fence));
xe_exec_queue_last_fence_put(q, vm);
q->last_fence = dma_fence_get(fence);
}
/**
- * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue
+ * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence
* @q: The exec queue
- * @vm: The VM the engine does a bind or exec for
+ * @vm: The VM the engine does a bind for
+ * @type: Either primary or media GT
+ */
+void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type)
+{
+ xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+
+ xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type);
+}
+
+/**
+ * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB
+ * invalidation fence unlocked
+ * @q: The exec queue
+ * @type: Either primary or media GT
+ *
+ * Only safe to be called from xe_exec_queue_destroy().
+ */
+void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
+ unsigned int type)
+{
+ xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+
+ dma_fence_put(q->tlb_inval[type].last_fence);
+ q->tlb_inval[type].last_fence = NULL;
+}
+
+/**
+ * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind for
+ * @type: Either primary or media GT
*
- * Returns:
- * -ETIME if there exists an unsignalled last fence dependency, zero otherwise.
+ * Get last fence, takes a ref
+ *
+ * Returns: last fence if not signaled, dma fence stub if signaled
*/
-int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm)
+struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type)
{
struct dma_fence *fence;
- int err = 0;
- fence = xe_exec_queue_last_fence_get(q, vm);
- if (fence) {
- err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ?
- 0 : -ETIME;
- dma_fence_put(fence);
- }
+ xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+ xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
+ EXEC_QUEUE_FLAG_MIGRATE));
- return err;
+ if (q->tlb_inval[type].last_fence &&
+ test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &q->tlb_inval[type].last_fence->flags))
+ xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
+
+ fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub();
+ dma_fence_get(fence);
+ return fence;
+}
+
+/**
+ * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind for
+ * @fence: The fence
+ * @type: Either primary or media GT
+ *
+ * Set the last fence for the tlb invalidation type on the queue. Increases
+ * reference count for fence, when closing queue
+ * xe_exec_queue_tlb_inval_last_fence_put should be called.
+ */
+void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ struct dma_fence *fence,
+ unsigned int type)
+{
+ xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+ xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
+ EXEC_QUEUE_FLAG_MIGRATE));
+ xe_assert(vm->xe, !dma_fence_is_container(fence));
+
+ xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
+ q->tlb_inval[type].last_fence = dma_fence_get(fence);
}
/**
@@ -1128,36 +1236,19 @@ int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch)
int err = 0;
for (i = 0; i < q->width; ++i) {
- xe_lrc_update_memirq_regs_with_address(q->lrc[i], q->hwe, scratch);
- xe_lrc_update_hwctx_regs_with_address(q->lrc[i]);
- err = xe_lrc_setup_wa_bb_with_scratch(q->lrc[i], q->hwe, scratch);
+ struct xe_lrc *lrc;
+
+ /* Pairs with WRITE_ONCE in __xe_exec_queue_init */
+ lrc = READ_ONCE(q->lrc[i]);
+ if (!lrc)
+ continue;
+
+ xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch);
+ xe_lrc_update_hwctx_regs_with_address(lrc);
+ err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch);
if (err)
break;
}
return err;
}
-
-/**
- * xe_exec_queue_jobs_ring_restore - Re-emit ring commands of requests pending on given queue.
- * @q: the &xe_exec_queue struct instance
- */
-void xe_exec_queue_jobs_ring_restore(struct xe_exec_queue *q)
-{
- struct xe_gpu_scheduler *sched = &q->guc->sched;
- struct xe_sched_job *job;
-
- /*
- * This routine is used within VF migration recovery. This means
- * using the lock here introduces a restriction: we cannot wait
- * for any GFX HW response while the lock is taken.
- */
- spin_lock(&sched->base.job_list_lock);
- list_for_each_entry(job, &sched->base.pending_list, drm.list) {
- if (xe_sched_job_is_error(job))
- continue;
-
- q->ring_ops->emit_job(job);
- }
- spin_unlock(&sched->base.job_list_lock);
-}