diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_request.h')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem_request.h | 76 |
1 files changed, 44 insertions, 32 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 6c72bd8d9423..974bd7bcc801 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -28,6 +28,7 @@ #include <linux/fence.h> #include "i915_gem.h" +#include "i915_sw_fence.h" struct intel_wait { struct rb_node node; @@ -82,26 +83,32 @@ struct drm_i915_gem_request { struct intel_ring *ring; struct intel_signal_node signaling; + struct i915_sw_fence submit; + wait_queue_t submitq; + /** GEM sequence number associated with the previous request, * when the HWS breadcrumb is equal to this the GPU is processing * this request. */ u32 previous_seqno; - /** Position in the ringbuffer of the start of the request */ + /** Position in the ring of the start of the request */ u32 head; /** - * Position in the ringbuffer of the start of the postfix. - * This is required to calculate the maximum available ringbuffer - * space without overwriting the postfix. + * Position in the ring of the start of the postfix. + * This is required to calculate the maximum available ring space + * without overwriting the postfix. */ u32 postfix; - /** Position in the ringbuffer of the end of the whole request */ + /** Position in the ring of the end of the whole request */ u32 tail; - /** Preallocate space in the ringbuffer for the emitting the request */ + /** Position in the ring of the end of any workarounds after the tail */ + u32 wa_tail; + + /** Preallocate space in the ring for the emitting the request */ u32 reserved_space; /** @@ -134,27 +141,8 @@ struct drm_i915_gem_request { /** file_priv list entry for this request */ struct list_head client_list; - /** - * The ELSP only accepts two elements at a time, so we queue - * context/tail pairs on a given queue (ring->execlist_queue) until the - * hardware is available. The queue serves a double purpose: we also use - * it to keep track of the up to 2 contexts currently in the hardware - * (usually one in execution and the other queued up by the GPU): We - * only remove elements from the head of the queue when the hardware - * informs us that an element has been completed. - * - * All accesses to the queue are mediated by a spinlock - * (ring->execlist_lock). - */ - - /** Execlist link in the submission queue.*/ + /** Link in the execlist submission queue, guarded by execlist_lock. */ struct list_head execlist_link; - - /** Execlists no. of times this request has been sent to the ELSP */ - int elsp_submitted; - - /** Execlists context hardware id. */ - unsigned int ctx_hw_id; }; extern const struct fence_ops i915_fence_ops; @@ -222,6 +210,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, *pdst = src; } +int +i915_gem_request_await_object(struct drm_i915_gem_request *to, + struct drm_i915_gem_object *obj, + bool write); + void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); #define i915_add_request(req) \ __i915_add_request(req, true) @@ -234,10 +227,12 @@ struct intel_rps_client; #define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p)) int i915_wait_request(struct drm_i915_gem_request *req, - bool interruptible, + unsigned int flags, s64 *timeout, struct intel_rps_client *rps) __attribute__((nonnull(1))); +#define I915_WAIT_INTERRUPTIBLE BIT(0) +#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); @@ -472,6 +467,19 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) if (!request || i915_gem_request_completed(request)) return NULL; + /* An especially silly compiler could decide to recompute the + * result of i915_gem_request_completed, more specifically + * re-emit the load for request->fence.seqno. A race would catch + * a later seqno value, which could flip the result from true to + * false. Which means part of the instructions below might not + * be executed, while later on instructions are executed. Due to + * barriers within the refcounting the inconsistency can't reach + * past the call to i915_gem_request_get_rcu, but not executing + * that while still executing i915_gem_request_put() creates + * havoc enough. Prevent this with a compiler barrier. + */ + barrier(); + request = i915_gem_request_get_rcu(request); /* What stops the following rcu_access_pointer() from occurring @@ -578,13 +586,15 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) if (!request) return 0; - return i915_wait_request(request, true, NULL, NULL); + return i915_wait_request(request, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + NULL, NULL); } /** * i915_gem_active_wait_unlocked - waits until the request is completed * @active - the active request on which to wait - * @interruptible - whether the wait can be woken by a userspace signal + * @flags - how to wait * @timeout - how long to wait at most * @rps - userspace client to charge for a waitboost * @@ -605,7 +615,7 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) */ static inline int i915_gem_active_wait_unlocked(const struct i915_gem_active *active, - bool interruptible, + unsigned int flags, s64 *timeout, struct intel_rps_client *rps) { @@ -614,7 +624,7 @@ i915_gem_active_wait_unlocked(const struct i915_gem_active *active, request = i915_gem_active_get_unlocked(active); if (request) { - ret = i915_wait_request(request, interruptible, timeout, rps); + ret = i915_wait_request(request, flags, timeout, rps); i915_gem_request_put(request); } @@ -641,7 +651,9 @@ i915_gem_active_retire(struct i915_gem_active *active, if (!request) return 0; - ret = i915_wait_request(request, true, NULL, NULL); + ret = i915_wait_request(request, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + NULL, NULL); if (ret) return ret; |
