diff options
Diffstat (limited to 'drivers/gpu/drm/scheduler')
-rw-r--r-- | drivers/gpu/drm/scheduler/sched_entity.c | 25 | ||||
-rw-r--r-- | drivers/gpu/drm/scheduler/sched_main.c | 67 | ||||
-rw-r--r-- | drivers/gpu/drm/scheduler/tests/mock_scheduler.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/scheduler/tests/sched_tests.h | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/scheduler/tests/tests_basic.c | 4 |
5 files changed, 57 insertions, 49 deletions
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 8867b95ab089..5a4697f636f2 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -285,9 +285,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) return 0; sched = entity->rq->sched; - /** - * The client will not queue more IBs during this fini, consume existing - * queued IBs or discard them on SIGKILL + /* + * The client will not queue more jobs during this fini - consume + * existing queued ones, or discard them on SIGKILL. */ if (current->flags & PF_EXITING) { if (timeout) @@ -300,7 +300,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) drm_sched_entity_is_idle(entity)); } - /* For killed process disable any more IBs enqueue right now */ + /* For a killed process disallow further enqueueing of jobs. */ last_user = cmpxchg(&entity->last_user, current->group_leader, NULL); if ((!last_user || last_user == current->group_leader) && (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) @@ -324,9 +324,9 @@ EXPORT_SYMBOL(drm_sched_entity_flush); void drm_sched_entity_fini(struct drm_sched_entity *entity) { /* - * If consumption of existing IBs wasn't completed. Forcefully remove - * them here. Also makes sure that the scheduler won't touch this entity - * any more. + * If consumption of existing jobs wasn't completed forcefully remove + * them. Also makes sure that the scheduler won't touch this entity any + * more. */ drm_sched_entity_kill(entity); @@ -391,7 +391,8 @@ EXPORT_SYMBOL(drm_sched_entity_set_priority); * Add a callback to the current dependency of the entity to wake up the * scheduler when the entity becomes available. */ -static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) +static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity, + struct drm_sched_job *sched_job) { struct drm_gpu_scheduler *sched = entity->rq->sched; struct dma_fence *fence = entity->dependency; @@ -421,6 +422,10 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) entity->dependency = fence; } + if (trace_drm_sched_job_unschedulable_enabled() && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &entity->dependency->flags)) + trace_drm_sched_job_unschedulable(sched_job, entity->dependency); + if (!dma_fence_add_callback(entity->dependency, &entity->cb, drm_sched_entity_wakeup)) return true; @@ -461,10 +466,8 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) while ((entity->dependency = drm_sched_job_dependency(sched_job, entity))) { - if (drm_sched_entity_add_dependency_cb(entity)) { - trace_drm_sched_job_unschedulable(sched_job, entity->dependency); + if (drm_sched_entity_add_dependency_cb(entity, sched_job)) return NULL; - } } /* skip jobs from entity that marked guilty */ diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index e2cda28a1af4..46119aacb809 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -349,37 +349,16 @@ static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched) } /** - * __drm_sched_run_free_queue - enqueue free-job work + * drm_sched_run_free_queue - enqueue free-job work * @sched: scheduler instance */ -static void __drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) +static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) { if (!READ_ONCE(sched->pause_submit)) queue_work(sched->submit_wq, &sched->work_free_job); } /** - * drm_sched_run_free_queue - enqueue free-job work if ready - * @sched: scheduler instance - */ -static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) -{ - struct drm_sched_job *job; - - job = list_first_entry_or_null(&sched->pending_list, - struct drm_sched_job, list); - if (job && dma_fence_is_signaled(&job->s_fence->finished)) - __drm_sched_run_free_queue(sched); -} - -static void drm_sched_run_free_queue_unlocked(struct drm_gpu_scheduler *sched) -{ - spin_lock(&sched->job_list_lock); - drm_sched_run_free_queue(sched); - spin_unlock(&sched->job_list_lock); -} - -/** * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done * @@ -398,7 +377,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) dma_fence_get(&s_fence->finished); drm_sched_fence_finished(s_fence, result); dma_fence_put(&s_fence->finished); - __drm_sched_run_free_queue(sched); + drm_sched_run_free_queue(sched); } /** @@ -1134,12 +1113,16 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) * drm_sched_get_finished_job - fetch the next finished job to be destroyed * * @sched: scheduler instance + * @have_more: are there more finished jobs on the list + * + * Informs the caller through @have_more whether there are more finished jobs + * besides the returned one. * * Returns the next finished job from the pending list (if there is one) * ready for it to be destroyed. */ static struct drm_sched_job * -drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) +drm_sched_get_finished_job(struct drm_gpu_scheduler *sched, bool *have_more) { struct drm_sched_job *job, *next; @@ -1147,22 +1130,25 @@ drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) job = list_first_entry_or_null(&sched->pending_list, struct drm_sched_job, list); - if (job && dma_fence_is_signaled(&job->s_fence->finished)) { /* remove job from pending_list */ list_del_init(&job->list); /* cancel this job's TO timer */ cancel_delayed_work(&sched->work_tdr); - /* make the scheduled timestamp more accurate */ + + *have_more = false; next = list_first_entry_or_null(&sched->pending_list, typeof(*next), list); - if (next) { + /* make the scheduled timestamp more accurate */ if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &next->s_fence->scheduled.flags)) next->s_fence->scheduled.timestamp = dma_fence_timestamp(&job->s_fence->finished); + + *have_more = dma_fence_is_signaled(&next->s_fence->finished); + /* start TO timer for next job */ drm_sched_start_timeout(sched); } @@ -1221,12 +1207,15 @@ static void drm_sched_free_job_work(struct work_struct *w) struct drm_gpu_scheduler *sched = container_of(w, struct drm_gpu_scheduler, work_free_job); struct drm_sched_job *job; + bool have_more; - job = drm_sched_get_finished_job(sched); - if (job) + job = drm_sched_get_finished_job(sched, &have_more); + if (job) { sched->ops->free_job(job); + if (have_more) + drm_sched_run_free_queue(sched); + } - drm_sched_run_free_queue_unlocked(sched); drm_sched_run_job_queue(sched); } @@ -1435,6 +1424,22 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) * Prevents reinsertion and marks job_queue as idle, * it will be removed from the rq in drm_sched_entity_fini() * eventually + * + * FIXME: + * This lacks the proper spin_lock(&s_entity->lock) and + * is, therefore, a race condition. Most notably, it + * can race with drm_sched_entity_push_job(). The lock + * cannot be taken here, however, because this would + * lead to lock inversion -> deadlock. + * + * The best solution probably is to enforce the life + * time rule of all entities having to be torn down + * before their scheduler. Then, however, locking could + * be dropped alltogether from this function. + * + * For now, this remains a potential race in all + * drivers that keep entities alive for longer than + * the scheduler. */ s_entity->stopped = true; spin_unlock(&rq->lock); diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c index 65acffc3fea8..8e9ae7d980eb 100644 --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c @@ -219,7 +219,7 @@ mock_sched_timedout_job(struct drm_sched_job *sched_job) unsigned long flags; if (job->flags & DRM_MOCK_SCHED_JOB_DONT_RESET) { - job->flags &= ~DRM_MOCK_SCHED_JOB_DONT_RESET; + job->flags |= DRM_MOCK_SCHED_JOB_RESET_SKIPPED; return DRM_GPU_SCHED_STAT_NO_HANG; } diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/drm/scheduler/tests/sched_tests.h index 63d4f2ac7074..7f31d35780cc 100644 --- a/drivers/gpu/drm/scheduler/tests/sched_tests.h +++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h @@ -11,7 +11,6 @@ #include <linux/hrtimer.h> #include <linux/ktime.h> #include <linux/list.h> -#include <linux/atomic.h> #include <linux/mutex.h> #include <linux/types.h> @@ -95,9 +94,10 @@ struct drm_mock_sched_job { struct completion done; -#define DRM_MOCK_SCHED_JOB_DONE 0x1 -#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x2 -#define DRM_MOCK_SCHED_JOB_DONT_RESET 0x4 +#define DRM_MOCK_SCHED_JOB_DONE 0x1 +#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x2 +#define DRM_MOCK_SCHED_JOB_DONT_RESET 0x4 +#define DRM_MOCK_SCHED_JOB_RESET_SKIPPED 0x8 unsigned long flags; struct list_head link; diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c index 55eb142bd7c5..82a41a456b0a 100644 --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c @@ -317,8 +317,8 @@ static void drm_sched_skip_reset(struct kunit *test) KUNIT_ASSERT_FALSE(test, done); KUNIT_ASSERT_EQ(test, - job->flags & DRM_MOCK_SCHED_JOB_DONT_RESET, - 0); + job->flags & DRM_MOCK_SCHED_JOB_RESET_SKIPPED, + DRM_MOCK_SCHED_JOB_RESET_SKIPPED); i = drm_mock_sched_advance(sched, 1); KUNIT_ASSERT_EQ(test, i, 1); |