summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_request.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_request.h')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.h76
1 files changed, 44 insertions, 32 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 6c72bd8d9423..974bd7bcc801 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -28,6 +28,7 @@
#include <linux/fence.h>
#include "i915_gem.h"
+#include "i915_sw_fence.h"
struct intel_wait {
struct rb_node node;
@@ -82,26 +83,32 @@ struct drm_i915_gem_request {
struct intel_ring *ring;
struct intel_signal_node signaling;
+ struct i915_sw_fence submit;
+ wait_queue_t submitq;
+
/** GEM sequence number associated with the previous request,
* when the HWS breadcrumb is equal to this the GPU is processing
* this request.
*/
u32 previous_seqno;
- /** Position in the ringbuffer of the start of the request */
+ /** Position in the ring of the start of the request */
u32 head;
/**
- * Position in the ringbuffer of the start of the postfix.
- * This is required to calculate the maximum available ringbuffer
- * space without overwriting the postfix.
+ * Position in the ring of the start of the postfix.
+ * This is required to calculate the maximum available ring space
+ * without overwriting the postfix.
*/
u32 postfix;
- /** Position in the ringbuffer of the end of the whole request */
+ /** Position in the ring of the end of the whole request */
u32 tail;
- /** Preallocate space in the ringbuffer for the emitting the request */
+ /** Position in the ring of the end of any workarounds after the tail */
+ u32 wa_tail;
+
+ /** Preallocate space in the ring for the emitting the request */
u32 reserved_space;
/**
@@ -134,27 +141,8 @@ struct drm_i915_gem_request {
/** file_priv list entry for this request */
struct list_head client_list;
- /**
- * The ELSP only accepts two elements at a time, so we queue
- * context/tail pairs on a given queue (ring->execlist_queue) until the
- * hardware is available. The queue serves a double purpose: we also use
- * it to keep track of the up to 2 contexts currently in the hardware
- * (usually one in execution and the other queued up by the GPU): We
- * only remove elements from the head of the queue when the hardware
- * informs us that an element has been completed.
- *
- * All accesses to the queue are mediated by a spinlock
- * (ring->execlist_lock).
- */
-
- /** Execlist link in the submission queue.*/
+ /** Link in the execlist submission queue, guarded by execlist_lock. */
struct list_head execlist_link;
-
- /** Execlists no. of times this request has been sent to the ELSP */
- int elsp_submitted;
-
- /** Execlists context hardware id. */
- unsigned int ctx_hw_id;
};
extern const struct fence_ops i915_fence_ops;
@@ -222,6 +210,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
*pdst = src;
}
+int
+i915_gem_request_await_object(struct drm_i915_gem_request *to,
+ struct drm_i915_gem_object *obj,
+ bool write);
+
void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
#define i915_add_request(req) \
__i915_add_request(req, true)
@@ -234,10 +227,12 @@ struct intel_rps_client;
#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
int i915_wait_request(struct drm_i915_gem_request *req,
- bool interruptible,
+ unsigned int flags,
s64 *timeout,
struct intel_rps_client *rps)
__attribute__((nonnull(1)));
+#define I915_WAIT_INTERRUPTIBLE BIT(0)
+#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */
static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
@@ -472,6 +467,19 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
if (!request || i915_gem_request_completed(request))
return NULL;
+ /* An especially silly compiler could decide to recompute the
+ * result of i915_gem_request_completed, more specifically
+ * re-emit the load for request->fence.seqno. A race would catch
+ * a later seqno value, which could flip the result from true to
+ * false. Which means part of the instructions below might not
+ * be executed, while later on instructions are executed. Due to
+ * barriers within the refcounting the inconsistency can't reach
+ * past the call to i915_gem_request_get_rcu, but not executing
+ * that while still executing i915_gem_request_put() creates
+ * havoc enough. Prevent this with a compiler barrier.
+ */
+ barrier();
+
request = i915_gem_request_get_rcu(request);
/* What stops the following rcu_access_pointer() from occurring
@@ -578,13 +586,15 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
if (!request)
return 0;
- return i915_wait_request(request, true, NULL, NULL);
+ return i915_wait_request(request,
+ I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+ NULL, NULL);
}
/**
* i915_gem_active_wait_unlocked - waits until the request is completed
* @active - the active request on which to wait
- * @interruptible - whether the wait can be woken by a userspace signal
+ * @flags - how to wait
* @timeout - how long to wait at most
* @rps - userspace client to charge for a waitboost
*
@@ -605,7 +615,7 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
*/
static inline int
i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
- bool interruptible,
+ unsigned int flags,
s64 *timeout,
struct intel_rps_client *rps)
{
@@ -614,7 +624,7 @@ i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
request = i915_gem_active_get_unlocked(active);
if (request) {
- ret = i915_wait_request(request, interruptible, timeout, rps);
+ ret = i915_wait_request(request, flags, timeout, rps);
i915_gem_request_put(request);
}
@@ -641,7 +651,9 @@ i915_gem_active_retire(struct i915_gem_active *active,
if (!request)
return 0;
- ret = i915_wait_request(request, true, NULL, NULL);
+ ret = i915_wait_request(request,
+ I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+ NULL, NULL);
if (ret)
return ret;