summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/scheduler
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/scheduler')
-rw-r--r--drivers/gpu/drm/scheduler/sched_entity.c25
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c67
-rw-r--r--drivers/gpu/drm/scheduler/tests/mock_scheduler.c2
-rw-r--r--drivers/gpu/drm/scheduler/tests/sched_tests.h8
-rw-r--r--drivers/gpu/drm/scheduler/tests/tests_basic.c4
5 files changed, 57 insertions, 49 deletions
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 8867b95ab089..5a4697f636f2 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -285,9 +285,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
return 0;
sched = entity->rq->sched;
- /**
- * The client will not queue more IBs during this fini, consume existing
- * queued IBs or discard them on SIGKILL
+ /*
+ * The client will not queue more jobs during this fini - consume
+ * existing queued ones, or discard them on SIGKILL.
*/
if (current->flags & PF_EXITING) {
if (timeout)
@@ -300,7 +300,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
drm_sched_entity_is_idle(entity));
}
- /* For killed process disable any more IBs enqueue right now */
+ /* For a killed process disallow further enqueueing of jobs. */
last_user = cmpxchg(&entity->last_user, current->group_leader, NULL);
if ((!last_user || last_user == current->group_leader) &&
(current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
@@ -324,9 +324,9 @@ EXPORT_SYMBOL(drm_sched_entity_flush);
void drm_sched_entity_fini(struct drm_sched_entity *entity)
{
/*
- * If consumption of existing IBs wasn't completed. Forcefully remove
- * them here. Also makes sure that the scheduler won't touch this entity
- * any more.
+ * If consumption of existing jobs wasn't completed forcefully remove
+ * them. Also makes sure that the scheduler won't touch this entity any
+ * more.
*/
drm_sched_entity_kill(entity);
@@ -391,7 +391,8 @@ EXPORT_SYMBOL(drm_sched_entity_set_priority);
* Add a callback to the current dependency of the entity to wake up the
* scheduler when the entity becomes available.
*/
-static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
+static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity,
+ struct drm_sched_job *sched_job)
{
struct drm_gpu_scheduler *sched = entity->rq->sched;
struct dma_fence *fence = entity->dependency;
@@ -421,6 +422,10 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
entity->dependency = fence;
}
+ if (trace_drm_sched_job_unschedulable_enabled() &&
+ !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &entity->dependency->flags))
+ trace_drm_sched_job_unschedulable(sched_job, entity->dependency);
+
if (!dma_fence_add_callback(entity->dependency, &entity->cb,
drm_sched_entity_wakeup))
return true;
@@ -461,10 +466,8 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
while ((entity->dependency =
drm_sched_job_dependency(sched_job, entity))) {
- if (drm_sched_entity_add_dependency_cb(entity)) {
- trace_drm_sched_job_unschedulable(sched_job, entity->dependency);
+ if (drm_sched_entity_add_dependency_cb(entity, sched_job))
return NULL;
- }
}
/* skip jobs from entity that marked guilty */
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index e2cda28a1af4..46119aacb809 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -349,37 +349,16 @@ static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
}
/**
- * __drm_sched_run_free_queue - enqueue free-job work
+ * drm_sched_run_free_queue - enqueue free-job work
* @sched: scheduler instance
*/
-static void __drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
+static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
{
if (!READ_ONCE(sched->pause_submit))
queue_work(sched->submit_wq, &sched->work_free_job);
}
/**
- * drm_sched_run_free_queue - enqueue free-job work if ready
- * @sched: scheduler instance
- */
-static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
-{
- struct drm_sched_job *job;
-
- job = list_first_entry_or_null(&sched->pending_list,
- struct drm_sched_job, list);
- if (job && dma_fence_is_signaled(&job->s_fence->finished))
- __drm_sched_run_free_queue(sched);
-}
-
-static void drm_sched_run_free_queue_unlocked(struct drm_gpu_scheduler *sched)
-{
- spin_lock(&sched->job_list_lock);
- drm_sched_run_free_queue(sched);
- spin_unlock(&sched->job_list_lock);
-}
-
-/**
* drm_sched_job_done - complete a job
* @s_job: pointer to the job which is done
*
@@ -398,7 +377,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
dma_fence_get(&s_fence->finished);
drm_sched_fence_finished(s_fence, result);
dma_fence_put(&s_fence->finished);
- __drm_sched_run_free_queue(sched);
+ drm_sched_run_free_queue(sched);
}
/**
@@ -1134,12 +1113,16 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
* drm_sched_get_finished_job - fetch the next finished job to be destroyed
*
* @sched: scheduler instance
+ * @have_more: are there more finished jobs on the list
+ *
+ * Informs the caller through @have_more whether there are more finished jobs
+ * besides the returned one.
*
* Returns the next finished job from the pending list (if there is one)
* ready for it to be destroyed.
*/
static struct drm_sched_job *
-drm_sched_get_finished_job(struct drm_gpu_scheduler *sched)
+drm_sched_get_finished_job(struct drm_gpu_scheduler *sched, bool *have_more)
{
struct drm_sched_job *job, *next;
@@ -1147,22 +1130,25 @@ drm_sched_get_finished_job(struct drm_gpu_scheduler *sched)
job = list_first_entry_or_null(&sched->pending_list,
struct drm_sched_job, list);
-
if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
/* remove job from pending_list */
list_del_init(&job->list);
/* cancel this job's TO timer */
cancel_delayed_work(&sched->work_tdr);
- /* make the scheduled timestamp more accurate */
+
+ *have_more = false;
next = list_first_entry_or_null(&sched->pending_list,
typeof(*next), list);
-
if (next) {
+ /* make the scheduled timestamp more accurate */
if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
&next->s_fence->scheduled.flags))
next->s_fence->scheduled.timestamp =
dma_fence_timestamp(&job->s_fence->finished);
+
+ *have_more = dma_fence_is_signaled(&next->s_fence->finished);
+
/* start TO timer for next job */
drm_sched_start_timeout(sched);
}
@@ -1221,12 +1207,15 @@ static void drm_sched_free_job_work(struct work_struct *w)
struct drm_gpu_scheduler *sched =
container_of(w, struct drm_gpu_scheduler, work_free_job);
struct drm_sched_job *job;
+ bool have_more;
- job = drm_sched_get_finished_job(sched);
- if (job)
+ job = drm_sched_get_finished_job(sched, &have_more);
+ if (job) {
sched->ops->free_job(job);
+ if (have_more)
+ drm_sched_run_free_queue(sched);
+ }
- drm_sched_run_free_queue_unlocked(sched);
drm_sched_run_job_queue(sched);
}
@@ -1435,6 +1424,22 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
* Prevents reinsertion and marks job_queue as idle,
* it will be removed from the rq in drm_sched_entity_fini()
* eventually
+ *
+ * FIXME:
+ * This lacks the proper spin_lock(&s_entity->lock) and
+ * is, therefore, a race condition. Most notably, it
+ * can race with drm_sched_entity_push_job(). The lock
+ * cannot be taken here, however, because this would
+ * lead to lock inversion -> deadlock.
+ *
+ * The best solution probably is to enforce the life
+ * time rule of all entities having to be torn down
+ * before their scheduler. Then, however, locking could
+ * be dropped alltogether from this function.
+ *
+ * For now, this remains a potential race in all
+ * drivers that keep entities alive for longer than
+ * the scheduler.
*/
s_entity->stopped = true;
spin_unlock(&rq->lock);
diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
index 65acffc3fea8..8e9ae7d980eb 100644
--- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
+++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
@@ -219,7 +219,7 @@ mock_sched_timedout_job(struct drm_sched_job *sched_job)
unsigned long flags;
if (job->flags & DRM_MOCK_SCHED_JOB_DONT_RESET) {
- job->flags &= ~DRM_MOCK_SCHED_JOB_DONT_RESET;
+ job->flags |= DRM_MOCK_SCHED_JOB_RESET_SKIPPED;
return DRM_GPU_SCHED_STAT_NO_HANG;
}
diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/drm/scheduler/tests/sched_tests.h
index 63d4f2ac7074..7f31d35780cc 100644
--- a/drivers/gpu/drm/scheduler/tests/sched_tests.h
+++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h
@@ -11,7 +11,6 @@
#include <linux/hrtimer.h>
#include <linux/ktime.h>
#include <linux/list.h>
-#include <linux/atomic.h>
#include <linux/mutex.h>
#include <linux/types.h>
@@ -95,9 +94,10 @@ struct drm_mock_sched_job {
struct completion done;
-#define DRM_MOCK_SCHED_JOB_DONE 0x1
-#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x2
-#define DRM_MOCK_SCHED_JOB_DONT_RESET 0x4
+#define DRM_MOCK_SCHED_JOB_DONE 0x1
+#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x2
+#define DRM_MOCK_SCHED_JOB_DONT_RESET 0x4
+#define DRM_MOCK_SCHED_JOB_RESET_SKIPPED 0x8
unsigned long flags;
struct list_head link;
diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
index 55eb142bd7c5..82a41a456b0a 100644
--- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
+++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
@@ -317,8 +317,8 @@ static void drm_sched_skip_reset(struct kunit *test)
KUNIT_ASSERT_FALSE(test, done);
KUNIT_ASSERT_EQ(test,
- job->flags & DRM_MOCK_SCHED_JOB_DONT_RESET,
- 0);
+ job->flags & DRM_MOCK_SCHED_JOB_RESET_SKIPPED,
+ DRM_MOCK_SCHED_JOB_RESET_SKIPPED);
i = drm_mock_sched_advance(sched, 1);
KUNIT_ASSERT_EQ(test, i, 1);