diff options
Diffstat (limited to 'drivers/gpu/drm/ttm')
| -rw-r--r-- | drivers/gpu/drm/ttm/tests/ttm_bo_test.c | 28 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c | 73 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/tests/ttm_device_test.c | 33 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c | 22 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h | 7 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/tests/ttm_mock_manager.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/tests/ttm_pool_test.c | 24 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/tests/ttm_resource_test.c | 5 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/ttm_bo.c | 67 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/ttm_bo_internal.h | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/ttm_bo_util.c | 38 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/ttm_device.c | 9 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/ttm_module.c | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/ttm_pool.c | 45 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/ttm_pool_internal.h | 25 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/ttm_resource.c | 37 | ||||
| -rw-r--r-- | drivers/gpu/drm/ttm/ttm_tt.c | 11 |
17 files changed, 248 insertions, 182 deletions
diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c index 6c77550c51af..d468f8322072 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c @@ -251,7 +251,7 @@ static void ttm_bo_unreserve_basic(struct kunit *test) ttm_dev = kunit_kzalloc(test, sizeof(*ttm_dev), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, ttm_dev); - err = ttm_device_kunit_init(priv, ttm_dev, false, false); + err = ttm_device_kunit_init(priv, ttm_dev, 0); KUNIT_ASSERT_EQ(test, err, 0); priv->ttm_dev = ttm_dev; @@ -290,7 +290,7 @@ static void ttm_bo_unreserve_pinned(struct kunit *test) ttm_dev = kunit_kzalloc(test, sizeof(*ttm_dev), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, ttm_dev); - err = ttm_device_kunit_init(priv, ttm_dev, false, false); + err = ttm_device_kunit_init(priv, ttm_dev, 0); KUNIT_ASSERT_EQ(test, err, 0); priv->ttm_dev = ttm_dev; @@ -342,7 +342,7 @@ static void ttm_bo_unreserve_bulk(struct kunit *test) resv = kunit_kzalloc(test, sizeof(*resv), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, resv); - err = ttm_device_kunit_init(priv, ttm_dev, false, false); + err = ttm_device_kunit_init(priv, ttm_dev, 0); KUNIT_ASSERT_EQ(test, err, 0); priv->ttm_dev = ttm_dev; @@ -379,7 +379,7 @@ static void ttm_bo_unreserve_bulk(struct kunit *test) dma_resv_fini(resv); } -static void ttm_bo_put_basic(struct kunit *test) +static void ttm_bo_fini_basic(struct kunit *test) { struct ttm_test_devices *priv = test->priv; struct ttm_buffer_object *bo; @@ -394,7 +394,7 @@ static void ttm_bo_put_basic(struct kunit *test) ttm_dev = kunit_kzalloc(test, sizeof(*ttm_dev), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, ttm_dev); - err = ttm_device_kunit_init(priv, ttm_dev, false, false); + err = ttm_device_kunit_init(priv, ttm_dev, 0); KUNIT_ASSERT_EQ(test, err, 0); priv->ttm_dev = ttm_dev; @@ -410,7 +410,7 @@ static void ttm_bo_put_basic(struct kunit *test) dma_resv_unlock(bo->base.resv); KUNIT_EXPECT_EQ(test, err, 0); - ttm_bo_put(bo); + ttm_bo_fini(bo); } static const char *mock_name(struct dma_fence *f) @@ -423,7 +423,7 @@ static const struct dma_fence_ops mock_fence_ops = { .get_timeline_name = mock_name, }; -static void ttm_bo_put_shared_resv(struct kunit *test) +static void ttm_bo_fini_shared_resv(struct kunit *test) { struct ttm_test_devices *priv = test->priv; struct ttm_buffer_object *bo; @@ -437,7 +437,7 @@ static void ttm_bo_put_shared_resv(struct kunit *test) ttm_dev = kunit_kzalloc(test, sizeof(*ttm_dev), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, ttm_dev); - err = ttm_device_kunit_init(priv, ttm_dev, false, false); + err = ttm_device_kunit_init(priv, ttm_dev, 0); KUNIT_ASSERT_EQ(test, err, 0); priv->ttm_dev = ttm_dev; @@ -463,7 +463,7 @@ static void ttm_bo_put_shared_resv(struct kunit *test) bo->type = ttm_bo_type_device; bo->base.resv = external_resv; - ttm_bo_put(bo); + ttm_bo_fini(bo); } static void ttm_bo_pin_basic(struct kunit *test) @@ -477,7 +477,7 @@ static void ttm_bo_pin_basic(struct kunit *test) ttm_dev = kunit_kzalloc(test, sizeof(*ttm_dev), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, ttm_dev); - err = ttm_device_kunit_init(priv, ttm_dev, false, false); + err = ttm_device_kunit_init(priv, ttm_dev, 0); KUNIT_ASSERT_EQ(test, err, 0); priv->ttm_dev = ttm_dev; @@ -512,7 +512,7 @@ static void ttm_bo_pin_unpin_resource(struct kunit *test) ttm_dev = kunit_kzalloc(test, sizeof(*ttm_dev), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, ttm_dev); - err = ttm_device_kunit_init(priv, ttm_dev, false, false); + err = ttm_device_kunit_init(priv, ttm_dev, 0); KUNIT_ASSERT_EQ(test, err, 0); priv->ttm_dev = ttm_dev; @@ -563,7 +563,7 @@ static void ttm_bo_multiple_pin_one_unpin(struct kunit *test) ttm_dev = kunit_kzalloc(test, sizeof(*ttm_dev), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, ttm_dev); - err = ttm_device_kunit_init(priv, ttm_dev, false, false); + err = ttm_device_kunit_init(priv, ttm_dev, 0); KUNIT_ASSERT_EQ(test, err, 0); priv->ttm_dev = ttm_dev; @@ -616,8 +616,8 @@ static struct kunit_case ttm_bo_test_cases[] = { KUNIT_CASE(ttm_bo_unreserve_basic), KUNIT_CASE(ttm_bo_unreserve_pinned), KUNIT_CASE(ttm_bo_unreserve_bulk), - KUNIT_CASE(ttm_bo_put_basic), - KUNIT_CASE(ttm_bo_put_shared_resv), + KUNIT_CASE(ttm_bo_fini_basic), + KUNIT_CASE(ttm_bo_fini_shared_resv), KUNIT_CASE(ttm_bo_pin_basic), KUNIT_CASE(ttm_bo_pin_unpin_resource), KUNIT_CASE(ttm_bo_multiple_pin_one_unpin), diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c index 1bcc67977f48..2eda87882e65 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c @@ -144,7 +144,7 @@ static void ttm_bo_init_reserved_sys_man(struct kunit *test) drm_mm_node_allocated(&bo->base.vma_node.vm_node)); ttm_resource_free(bo, &bo->resource); - ttm_bo_put(bo); + ttm_bo_fini(bo); } static void ttm_bo_init_reserved_mock_man(struct kunit *test) @@ -186,7 +186,7 @@ static void ttm_bo_init_reserved_mock_man(struct kunit *test) drm_mm_node_allocated(&bo->base.vma_node.vm_node)); ttm_resource_free(bo, &bo->resource); - ttm_bo_put(bo); + ttm_bo_fini(bo); ttm_mock_manager_fini(priv->ttm_dev, mem_type); } @@ -221,7 +221,7 @@ static void ttm_bo_init_reserved_resv(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, bo->base.resv, &resv); ttm_resource_free(bo, &bo->resource); - ttm_bo_put(bo); + ttm_bo_fini(bo); } static void ttm_bo_validate_basic(struct kunit *test) @@ -265,7 +265,7 @@ static void ttm_bo_validate_basic(struct kunit *test) KUNIT_EXPECT_EQ(test, bo->resource->placement, DRM_BUDDY_TOPDOWN_ALLOCATION); - ttm_bo_put(bo); + ttm_bo_fini(bo); ttm_mock_manager_fini(priv->ttm_dev, snd_mem); } @@ -292,7 +292,7 @@ static void ttm_bo_validate_invalid_placement(struct kunit *test) KUNIT_EXPECT_EQ(test, err, -ENOMEM); - ttm_bo_put(bo); + ttm_bo_fini(bo); } static void ttm_bo_validate_failed_alloc(struct kunit *test) @@ -321,7 +321,7 @@ static void ttm_bo_validate_failed_alloc(struct kunit *test) KUNIT_EXPECT_EQ(test, err, -ENOMEM); - ttm_bo_put(bo); + ttm_bo_fini(bo); ttm_bad_manager_fini(priv->ttm_dev, mem_type); } @@ -353,7 +353,7 @@ static void ttm_bo_validate_pinned(struct kunit *test) ttm_bo_unpin(bo); dma_resv_unlock(bo->base.resv); - ttm_bo_put(bo); + ttm_bo_fini(bo); } static const struct ttm_bo_validate_test_case ttm_mem_type_cases[] = { @@ -403,7 +403,7 @@ static void ttm_bo_validate_same_placement(struct kunit *test) KUNIT_EXPECT_EQ(test, err, 0); KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, 0); - ttm_bo_put(bo); + ttm_bo_fini(bo); if (params->mem_type != TTM_PL_SYSTEM) ttm_mock_manager_fini(priv->ttm_dev, params->mem_type); @@ -452,7 +452,7 @@ static void ttm_bo_validate_busy_placement(struct kunit *test) KUNIT_EXPECT_EQ(test, bo->resource->mem_type, snd_mem); KUNIT_ASSERT_TRUE(test, list_is_singular(&man->lru[bo->priority])); - ttm_bo_put(bo); + ttm_bo_fini(bo); ttm_bad_manager_fini(priv->ttm_dev, fst_mem); ttm_mock_manager_fini(priv->ttm_dev, snd_mem); } @@ -495,7 +495,7 @@ static void ttm_bo_validate_multihop(struct kunit *test) KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size * 2); KUNIT_EXPECT_EQ(test, bo->resource->mem_type, final_mem); - ttm_bo_put(bo); + ttm_bo_fini(bo); ttm_mock_manager_fini(priv->ttm_dev, fst_mem); ttm_mock_manager_fini(priv->ttm_dev, tmp_mem); @@ -567,7 +567,7 @@ static void ttm_bo_validate_no_placement_signaled(struct kunit *test) KUNIT_ASSERT_TRUE(test, flags & TTM_TT_FLAG_ZERO_ALLOC); } - ttm_bo_put(bo); + ttm_bo_fini(bo); } static int threaded_dma_resv_signal(void *arg) @@ -635,7 +635,7 @@ static void ttm_bo_validate_no_placement_not_signaled(struct kunit *test) /* Make sure we have an idle object at this point */ dma_resv_wait_timeout(bo->base.resv, usage, false, MAX_SCHEDULE_TIMEOUT); - ttm_bo_put(bo); + ttm_bo_fini(bo); } static void ttm_bo_validate_move_fence_signaled(struct kunit *test) @@ -652,7 +652,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test) int err; man = ttm_manager_type(priv->ttm_dev, mem_type); - man->move = dma_fence_get_stub(); + man->eviction_fences[0] = dma_fence_get_stub(); bo = ttm_bo_kunit_init(test, test->priv, size, NULL); bo->type = bo_type; @@ -668,8 +668,8 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test) KUNIT_EXPECT_EQ(test, bo->resource->mem_type, mem_type); KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size); - ttm_bo_put(bo); - dma_fence_put(man->move); + ttm_bo_fini(bo); + dma_fence_put(man->eviction_fences[0]); } static const struct ttm_bo_validate_test_case ttm_bo_validate_wait_cases[] = { @@ -733,9 +733,9 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test) spin_lock_init(&fence_lock); man = ttm_manager_type(priv->ttm_dev, fst_mem); - man->move = alloc_mock_fence(test); + man->eviction_fences[0] = alloc_mock_fence(test); - task = kthread_create(threaded_fence_signal, man->move, "move-fence-signal"); + task = kthread_create(threaded_fence_signal, man->eviction_fences[0], "move-fence-signal"); if (IS_ERR(task)) KUNIT_FAIL(test, "Couldn't create move fence signal task\n"); @@ -743,7 +743,8 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test) err = ttm_bo_validate(bo, placement_val, &ctx_val); dma_resv_unlock(bo->base.resv); - dma_fence_wait_timeout(man->move, false, MAX_SCHEDULE_TIMEOUT); + dma_fence_wait_timeout(man->eviction_fences[0], false, MAX_SCHEDULE_TIMEOUT); + man->eviction_fences[0] = NULL; KUNIT_EXPECT_EQ(test, err, 0); KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size); @@ -753,7 +754,7 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test) else KUNIT_EXPECT_EQ(test, bo->resource->mem_type, fst_mem); - ttm_bo_put(bo); + ttm_bo_fini(bo); ttm_mock_manager_fini(priv->ttm_dev, fst_mem); ttm_mock_manager_fini(priv->ttm_dev, snd_mem); } @@ -807,8 +808,8 @@ static void ttm_bo_validate_happy_evict(struct kunit *test) KUNIT_EXPECT_EQ(test, bos[1].resource->mem_type, mem_type); for (i = 0; i < bo_no; i++) - ttm_bo_put(&bos[i]); - ttm_bo_put(bo_val); + ttm_bo_fini(&bos[i]); + ttm_bo_fini(bo_val); ttm_mock_manager_fini(priv->ttm_dev, mem_type); ttm_mock_manager_fini(priv->ttm_dev, mem_multihop); @@ -852,12 +853,12 @@ static void ttm_bo_validate_all_pinned_evict(struct kunit *test) KUNIT_EXPECT_EQ(test, err, -ENOMEM); - ttm_bo_put(bo_small); + ttm_bo_fini(bo_small); ttm_bo_reserve(bo_big, false, false, NULL); ttm_bo_unpin(bo_big); dma_resv_unlock(bo_big->base.resv); - ttm_bo_put(bo_big); + ttm_bo_fini(bo_big); ttm_mock_manager_fini(priv->ttm_dev, mem_type); ttm_mock_manager_fini(priv->ttm_dev, mem_multihop); @@ -916,13 +917,13 @@ static void ttm_bo_validate_allowed_only_evict(struct kunit *test) KUNIT_EXPECT_EQ(test, bo_evictable->resource->mem_type, mem_type_evict); KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size * 2 + BO_SIZE); - ttm_bo_put(bo); - ttm_bo_put(bo_evictable); + ttm_bo_fini(bo); + ttm_bo_fini(bo_evictable); ttm_bo_reserve(bo_pinned, false, false, NULL); ttm_bo_unpin(bo_pinned); dma_resv_unlock(bo_pinned->base.resv); - ttm_bo_put(bo_pinned); + ttm_bo_fini(bo_pinned); ttm_mock_manager_fini(priv->ttm_dev, mem_type); ttm_mock_manager_fini(priv->ttm_dev, mem_multihop); @@ -973,8 +974,8 @@ static void ttm_bo_validate_deleted_evict(struct kunit *test) KUNIT_EXPECT_NULL(test, bo_big->ttm); KUNIT_EXPECT_NULL(test, bo_big->resource); - ttm_bo_put(bo_small); - ttm_bo_put(bo_big); + ttm_bo_fini(bo_small); + ttm_bo_fini(bo_big); ttm_mock_manager_fini(priv->ttm_dev, mem_type); } @@ -995,7 +996,7 @@ static void ttm_bo_validate_busy_domain_evict(struct kunit *test) */ ttm_device_fini(priv->ttm_dev); - err = ttm_device_kunit_init_bad_evict(test->priv, priv->ttm_dev, false, false); + err = ttm_device_kunit_init_bad_evict(test->priv, priv->ttm_dev); KUNIT_ASSERT_EQ(test, err, 0); ttm_mock_manager_init(priv->ttm_dev, mem_type, MANAGER_SIZE); @@ -1025,8 +1026,8 @@ static void ttm_bo_validate_busy_domain_evict(struct kunit *test) KUNIT_EXPECT_EQ(test, bo_init->resource->mem_type, mem_type); KUNIT_EXPECT_NULL(test, bo_val->resource); - ttm_bo_put(bo_init); - ttm_bo_put(bo_val); + ttm_bo_fini(bo_init); + ttm_bo_fini(bo_val); ttm_mock_manager_fini(priv->ttm_dev, mem_type); ttm_bad_manager_fini(priv->ttm_dev, mem_type_evict); @@ -1070,8 +1071,8 @@ static void ttm_bo_validate_evict_gutting(struct kunit *test) KUNIT_ASSERT_NULL(test, bo_evict->resource); KUNIT_ASSERT_TRUE(test, bo_evict->ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC); - ttm_bo_put(bo_evict); - ttm_bo_put(bo); + ttm_bo_fini(bo_evict); + ttm_bo_fini(bo); ttm_mock_manager_fini(priv->ttm_dev, mem_type); } @@ -1128,9 +1129,9 @@ static void ttm_bo_validate_recrusive_evict(struct kunit *test) ttm_mock_manager_fini(priv->ttm_dev, mem_type); ttm_mock_manager_fini(priv->ttm_dev, mem_type_evict); - ttm_bo_put(bo_val); - ttm_bo_put(bo_tt); - ttm_bo_put(bo_mock); + ttm_bo_fini(bo_val); + ttm_bo_fini(bo_tt); + ttm_bo_fini(bo_mock); } static struct kunit_case ttm_bo_validate_test_cases[] = { diff --git a/drivers/gpu/drm/ttm/tests/ttm_device_test.c b/drivers/gpu/drm/ttm/tests/ttm_device_test.c index 1621903818e5..2d55ad34fe48 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_device_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_device_test.c @@ -7,11 +7,11 @@ #include <drm/ttm/ttm_placement.h> #include "ttm_kunit_helpers.h" +#include "../ttm_pool_internal.h" struct ttm_device_test_case { const char *description; - bool use_dma_alloc; - bool use_dma32; + unsigned int alloc_flags; bool pools_init_expected; }; @@ -25,7 +25,7 @@ static void ttm_device_init_basic(struct kunit *test) ttm_dev = kunit_kzalloc(test, sizeof(*ttm_dev), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, ttm_dev); - err = ttm_device_kunit_init(priv, ttm_dev, false, false); + err = ttm_device_kunit_init(priv, ttm_dev, 0); KUNIT_ASSERT_EQ(test, err, 0); KUNIT_EXPECT_PTR_EQ(test, ttm_dev->funcs, &ttm_dev_funcs); @@ -55,7 +55,7 @@ static void ttm_device_init_multiple(struct kunit *test) KUNIT_ASSERT_NOT_NULL(test, ttm_devs); for (i = 0; i < num_dev; i++) { - err = ttm_device_kunit_init(priv, &ttm_devs[i], false, false); + err = ttm_device_kunit_init(priv, &ttm_devs[i], 0); KUNIT_ASSERT_EQ(test, err, 0); KUNIT_EXPECT_PTR_EQ(test, ttm_devs[i].dev_mapping, @@ -81,7 +81,7 @@ static void ttm_device_fini_basic(struct kunit *test) ttm_dev = kunit_kzalloc(test, sizeof(*ttm_dev), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, ttm_dev); - err = ttm_device_kunit_init(priv, ttm_dev, false, false); + err = ttm_device_kunit_init(priv, ttm_dev, 0); KUNIT_ASSERT_EQ(test, err, 0); man = ttm_manager_type(ttm_dev, TTM_PL_SYSTEM); @@ -109,7 +109,7 @@ static void ttm_device_init_no_vma_man(struct kunit *test) vma_man = drm->vma_offset_manager; drm->vma_offset_manager = NULL; - err = ttm_device_kunit_init(priv, ttm_dev, false, false); + err = ttm_device_kunit_init(priv, ttm_dev, 0); KUNIT_EXPECT_EQ(test, err, -EINVAL); /* Bring the manager back for a graceful cleanup */ @@ -119,26 +119,22 @@ static void ttm_device_init_no_vma_man(struct kunit *test) static const struct ttm_device_test_case ttm_device_cases[] = { { .description = "No DMA allocations, no DMA32 required", - .use_dma_alloc = false, - .use_dma32 = false, .pools_init_expected = false, }, { .description = "DMA allocations, DMA32 required", - .use_dma_alloc = true, - .use_dma32 = true, + .alloc_flags = TTM_ALLOCATION_POOL_USE_DMA_ALLOC | + TTM_ALLOCATION_POOL_USE_DMA32, .pools_init_expected = true, }, { .description = "No DMA allocations, DMA32 required", - .use_dma_alloc = false, - .use_dma32 = true, + .alloc_flags = TTM_ALLOCATION_POOL_USE_DMA32, .pools_init_expected = false, }, { .description = "DMA allocations, no DMA32 required", - .use_dma_alloc = true, - .use_dma32 = false, + .alloc_flags = TTM_ALLOCATION_POOL_USE_DMA_ALLOC, .pools_init_expected = true, }, }; @@ -162,16 +158,13 @@ static void ttm_device_init_pools(struct kunit *test) ttm_dev = kunit_kzalloc(test, sizeof(*ttm_dev), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, ttm_dev); - err = ttm_device_kunit_init(priv, ttm_dev, - params->use_dma_alloc, - params->use_dma32); + err = ttm_device_kunit_init(priv, ttm_dev, params->alloc_flags); KUNIT_ASSERT_EQ(test, err, 0); pool = &ttm_dev->pool; KUNIT_ASSERT_NOT_NULL(test, pool); KUNIT_EXPECT_PTR_EQ(test, pool->dev, priv->dev); - KUNIT_EXPECT_EQ(test, pool->use_dma_alloc, params->use_dma_alloc); - KUNIT_EXPECT_EQ(test, pool->use_dma32, params->use_dma32); + KUNIT_EXPECT_EQ(test, pool->alloc_flags, params->alloc_flags); if (params->pools_init_expected) { for (int i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { @@ -181,7 +174,7 @@ static void ttm_device_init_pools(struct kunit *test) KUNIT_EXPECT_EQ(test, pt.caching, i); KUNIT_EXPECT_EQ(test, pt.order, j); - if (params->use_dma_alloc) + if (ttm_pool_uses_dma_alloc(pool)) KUNIT_ASSERT_FALSE(test, list_empty(&pt.pages)); } diff --git a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c index 7aaf0d1395ff..7b533e4e1e04 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c +++ b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c @@ -117,8 +117,7 @@ static void bad_evict_flags(struct ttm_buffer_object *bo, static int ttm_device_kunit_init_with_funcs(struct ttm_test_devices *priv, struct ttm_device *ttm, - bool use_dma_alloc, - bool use_dma32, + unsigned int alloc_flags, struct ttm_device_funcs *funcs) { struct drm_device *drm = priv->drm; @@ -127,7 +126,7 @@ static int ttm_device_kunit_init_with_funcs(struct ttm_test_devices *priv, err = ttm_device_init(ttm, funcs, drm->dev, drm->anon_inode->i_mapping, drm->vma_offset_manager, - use_dma_alloc, use_dma32); + alloc_flags); return err; } @@ -143,11 +142,10 @@ EXPORT_SYMBOL_GPL(ttm_dev_funcs); int ttm_device_kunit_init(struct ttm_test_devices *priv, struct ttm_device *ttm, - bool use_dma_alloc, - bool use_dma32) + unsigned int alloc_flags) { - return ttm_device_kunit_init_with_funcs(priv, ttm, use_dma_alloc, - use_dma32, &ttm_dev_funcs); + return ttm_device_kunit_init_with_funcs(priv, ttm, alloc_flags, + &ttm_dev_funcs); } EXPORT_SYMBOL_GPL(ttm_device_kunit_init); @@ -161,12 +159,10 @@ struct ttm_device_funcs ttm_dev_funcs_bad_evict = { EXPORT_SYMBOL_GPL(ttm_dev_funcs_bad_evict); int ttm_device_kunit_init_bad_evict(struct ttm_test_devices *priv, - struct ttm_device *ttm, - bool use_dma_alloc, - bool use_dma32) + struct ttm_device *ttm) { - return ttm_device_kunit_init_with_funcs(priv, ttm, use_dma_alloc, - use_dma32, &ttm_dev_funcs_bad_evict); + return ttm_device_kunit_init_with_funcs(priv, ttm, 0, + &ttm_dev_funcs_bad_evict); } EXPORT_SYMBOL_GPL(ttm_device_kunit_init_bad_evict); @@ -252,7 +248,7 @@ struct ttm_test_devices *ttm_test_devices_all(struct kunit *test) ttm_dev = kunit_kzalloc(test, sizeof(*ttm_dev), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, ttm_dev); - err = ttm_device_kunit_init(devs, ttm_dev, false, false); + err = ttm_device_kunit_init(devs, ttm_dev, 0); KUNIT_ASSERT_EQ(test, err, 0); devs->ttm_dev = ttm_dev; diff --git a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h index c7da23232ffa..f8402b979d05 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h +++ b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h @@ -28,12 +28,9 @@ struct ttm_test_devices { /* Building blocks for test-specific init functions */ int ttm_device_kunit_init(struct ttm_test_devices *priv, struct ttm_device *ttm, - bool use_dma_alloc, - bool use_dma32); + unsigned int alloc_flags); int ttm_device_kunit_init_bad_evict(struct ttm_test_devices *priv, - struct ttm_device *ttm, - bool use_dma_alloc, - bool use_dma32); + struct ttm_device *ttm); struct ttm_buffer_object *ttm_bo_kunit_init(struct kunit *test, struct ttm_test_devices *devs, size_t size, diff --git a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c index d7eb6471f2ed..dd395229e388 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c +++ b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c @@ -4,6 +4,7 @@ */ #include <linux/export.h> +#include <linux/module.h> #include <drm/ttm/ttm_resource.h> #include <drm/ttm/ttm_device.h> diff --git a/drivers/gpu/drm/ttm/tests/ttm_pool_test.c b/drivers/gpu/drm/ttm/tests/ttm_pool_test.c index 8ade53371f72..11c92bd75779 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_pool_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_pool_test.c @@ -8,11 +8,12 @@ #include <drm/ttm/ttm_pool.h> #include "ttm_kunit_helpers.h" +#include "../ttm_pool_internal.h" struct ttm_pool_test_case { const char *description; unsigned int order; - bool use_dma_alloc; + unsigned int alloc_flags; }; struct ttm_pool_test_priv { @@ -86,7 +87,7 @@ static struct ttm_pool *ttm_pool_pre_populated(struct kunit *test, pool = kunit_kzalloc(test, sizeof(*pool), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, pool); - ttm_pool_init(pool, devs->dev, NUMA_NO_NODE, true, false); + ttm_pool_init(pool, devs->dev, NUMA_NO_NODE, TTM_ALLOCATION_POOL_USE_DMA_ALLOC); err = ttm_pool_alloc(pool, tt, &simple_ctx); KUNIT_ASSERT_EQ(test, err, 0); @@ -113,12 +114,12 @@ static const struct ttm_pool_test_case ttm_pool_basic_cases[] = { { .description = "One page, with coherent DMA mappings enabled", .order = 0, - .use_dma_alloc = true, + .alloc_flags = TTM_ALLOCATION_POOL_USE_DMA_ALLOC, }, { .description = "Above the allocation limit, with coherent DMA mappings enabled", .order = MAX_PAGE_ORDER + 1, - .use_dma_alloc = true, + .alloc_flags = TTM_ALLOCATION_POOL_USE_DMA_ALLOC, }, }; @@ -150,12 +151,11 @@ static void ttm_pool_alloc_basic(struct kunit *test) pool = kunit_kzalloc(test, sizeof(*pool), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, pool); - ttm_pool_init(pool, devs->dev, NUMA_NO_NODE, params->use_dma_alloc, - false); + ttm_pool_init(pool, devs->dev, NUMA_NO_NODE, params->alloc_flags); KUNIT_ASSERT_PTR_EQ(test, pool->dev, devs->dev); KUNIT_ASSERT_EQ(test, pool->nid, NUMA_NO_NODE); - KUNIT_ASSERT_EQ(test, pool->use_dma_alloc, params->use_dma_alloc); + KUNIT_ASSERT_EQ(test, pool->alloc_flags, params->alloc_flags); err = ttm_pool_alloc(pool, tt, &simple_ctx); KUNIT_ASSERT_EQ(test, err, 0); @@ -165,14 +165,14 @@ static void ttm_pool_alloc_basic(struct kunit *test) last_page = tt->pages[tt->num_pages - 1]; if (params->order <= MAX_PAGE_ORDER) { - if (params->use_dma_alloc) { + if (ttm_pool_uses_dma_alloc(pool)) { KUNIT_ASSERT_NOT_NULL(test, (void *)fst_page->private); KUNIT_ASSERT_NOT_NULL(test, (void *)last_page->private); } else { KUNIT_ASSERT_EQ(test, fst_page->private, params->order); } } else { - if (params->use_dma_alloc) { + if (ttm_pool_uses_dma_alloc(pool)) { KUNIT_ASSERT_NOT_NULL(test, (void *)fst_page->private); KUNIT_ASSERT_NULL(test, (void *)last_page->private); } else { @@ -218,7 +218,7 @@ static void ttm_pool_alloc_basic_dma_addr(struct kunit *test) pool = kunit_kzalloc(test, sizeof(*pool), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, pool); - ttm_pool_init(pool, devs->dev, NUMA_NO_NODE, true, false); + ttm_pool_init(pool, devs->dev, NUMA_NO_NODE, TTM_ALLOCATION_POOL_USE_DMA_ALLOC); err = ttm_pool_alloc(pool, tt, &simple_ctx); KUNIT_ASSERT_EQ(test, err, 0); @@ -348,7 +348,7 @@ static void ttm_pool_free_dma_alloc(struct kunit *test) pool = kunit_kzalloc(test, sizeof(*pool), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, pool); - ttm_pool_init(pool, devs->dev, NUMA_NO_NODE, true, false); + ttm_pool_init(pool, devs->dev, NUMA_NO_NODE, TTM_ALLOCATION_POOL_USE_DMA_ALLOC); ttm_pool_alloc(pool, tt, &simple_ctx); pt = &pool->caching[caching].orders[order]; @@ -379,7 +379,7 @@ static void ttm_pool_free_no_dma_alloc(struct kunit *test) pool = kunit_kzalloc(test, sizeof(*pool), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, pool); - ttm_pool_init(pool, devs->dev, NUMA_NO_NODE, false, false); + ttm_pool_init(pool, devs->dev, NUMA_NO_NODE, 0); ttm_pool_alloc(pool, tt, &simple_ctx); pt = &pool->caching[caching].orders[order]; diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c index e6ea2bd01f07..c0e4e35e0442 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c @@ -207,6 +207,7 @@ static void ttm_resource_manager_init_basic(struct kunit *test) struct ttm_resource_test_priv *priv = test->priv; struct ttm_resource_manager *man; size_t size = SZ_16K; + int i; man = kunit_kzalloc(test, sizeof(*man), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, man); @@ -216,8 +217,8 @@ static void ttm_resource_manager_init_basic(struct kunit *test) KUNIT_ASSERT_PTR_EQ(test, man->bdev, priv->devs->ttm_dev); KUNIT_ASSERT_EQ(test, man->size, size); KUNIT_ASSERT_EQ(test, man->usage, 0); - KUNIT_ASSERT_NULL(test, man->move); - KUNIT_ASSERT_NOT_NULL(test, &man->move_lock); + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) + KUNIT_ASSERT_NULL(test, man->eviction_fences[i]); for (int i = 0; i < TTM_MAX_BO_PRIORITY; ++i) KUNIT_ASSERT_TRUE(test, list_empty(&man->lru[i])); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 29423ceeec5c..bd27607f8076 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -31,6 +31,8 @@ #define pr_fmt(fmt) "[TTM] " fmt +#include <drm/drm_print.h> +#include <drm/ttm/ttm_allocation.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_tt.h> @@ -318,18 +320,17 @@ static void ttm_bo_release(struct kref *kref) bo->destroy(bo); } -/** - * ttm_bo_put - * - * @bo: The buffer object. - * - * Unreference a buffer object. - */ +/* TODO: remove! */ void ttm_bo_put(struct ttm_buffer_object *bo) { kref_put(&bo->kref, ttm_bo_release); } -EXPORT_SYMBOL(ttm_bo_put); + +void ttm_bo_fini(struct ttm_buffer_object *bo) +{ + ttm_bo_put(bo); +} +EXPORT_SYMBOL(ttm_bo_fini); static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, @@ -658,34 +659,35 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo) EXPORT_SYMBOL(ttm_bo_unpin); /* - * Add the last move fence to the BO as kernel dependency and reserve a new - * fence slot. + * Add the pipelined eviction fencesto the BO as kernel dependency and reserve new + * fence slots. */ -static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, - struct ttm_resource_manager *man, - bool no_wait_gpu) +static int ttm_bo_add_pipelined_eviction_fences(struct ttm_buffer_object *bo, + struct ttm_resource_manager *man, + bool no_wait_gpu) { struct dma_fence *fence; - int ret; + int i; - spin_lock(&man->move_lock); - fence = dma_fence_get(man->move); - spin_unlock(&man->move_lock); - - if (!fence) - return 0; + spin_lock(&man->eviction_lock); + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) { + fence = man->eviction_fences[i]; + if (!fence) + continue; - if (no_wait_gpu) { - ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY; - dma_fence_put(fence); - return ret; + if (no_wait_gpu) { + if (!dma_fence_is_signaled(fence)) { + spin_unlock(&man->eviction_lock); + return -EBUSY; + } + } else { + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); + } } + spin_unlock(&man->eviction_lock); - dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); - - ret = dma_resv_reserve_fences(bo->base.resv, 1); - dma_fence_put(fence); - return ret; + /* TODO: this call should be removed. */ + return dma_resv_reserve_fences(bo->base.resv, 1); } /** @@ -718,7 +720,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, int i, ret; ticket = dma_resv_locking_ctx(bo->base.resv); - ret = dma_resv_reserve_fences(bo->base.resv, 1); + ret = dma_resv_reserve_fences(bo->base.resv, TTM_NUM_MOVE_FENCES); if (unlikely(ret)) return ret; @@ -757,7 +759,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, return ret; } - ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu); + ret = ttm_bo_add_pipelined_eviction_fences(bo, man, ctx->no_wait_gpu); if (unlikely(ret)) { ttm_resource_free(bo, res); if (ret == -EBUSY) @@ -878,7 +880,8 @@ bounce: /* For backward compatibility with userspace */ if (ret == -ENOSPC) - return -ENOMEM; + return bo->bdev->alloc_flags & TTM_ALLOCATION_PROPAGATE_ENOSPC ? + ret : -ENOMEM; /* * We might need to add a TTM. diff --git a/drivers/gpu/drm/ttm/ttm_bo_internal.h b/drivers/gpu/drm/ttm/ttm_bo_internal.h index 9d8b747a34db..e0d48eac74b0 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_internal.h +++ b/drivers/gpu/drm/ttm/ttm_bo_internal.h @@ -55,4 +55,6 @@ ttm_bo_get_unless_zero(struct ttm_buffer_object *bo) return bo; } +void ttm_bo_put(struct ttm_buffer_object *bo); + #endif diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index acbbca9d5c92..2ff35d55e462 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -258,7 +258,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, ret = dma_resv_trylock(&fbo->base.base._resv); WARN_ON(!ret); - ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); + ret = dma_resv_reserve_fences(&fbo->base.base._resv, TTM_NUM_MOVE_FENCES); if (ret) { dma_resv_unlock(&fbo->base.base._resv); kfree(fbo); @@ -646,20 +646,44 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo, { struct ttm_device *bdev = bo->bdev; struct ttm_resource_manager *from; + struct dma_fence *tmp; + int i; from = ttm_manager_type(bdev, bo->resource->mem_type); /** * BO doesn't have a TTM we need to bind/unbind. Just remember - * this eviction and free up the allocation + * this eviction and free up the allocation. + * The fence will be saved in the first free slot or in the slot + * already used to store a fence from the same context. Since + * drivers can't use more than TTM_NUM_MOVE_FENCES contexts for + * evictions we should always find a slot to use. */ - spin_lock(&from->move_lock); - if (!from->move || dma_fence_is_later(fence, from->move)) { - dma_fence_put(from->move); - from->move = dma_fence_get(fence); + spin_lock(&from->eviction_lock); + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) { + tmp = from->eviction_fences[i]; + if (!tmp) + break; + if (fence->context != tmp->context) + continue; + if (dma_fence_is_later(fence, tmp)) { + dma_fence_put(tmp); + break; + } + goto unlock; + } + if (i < TTM_NUM_MOVE_FENCES) { + from->eviction_fences[i] = dma_fence_get(fence); + } else { + WARN(1, "not enough fence slots for all fence contexts"); + spin_unlock(&from->eviction_lock); + dma_fence_wait(fence, false); + goto end; } - spin_unlock(&from->move_lock); +unlock: + spin_unlock(&from->eviction_lock); +end: ttm_resource_free(bo, &bo->resource); } diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index c3e2fcbdd2cc..9a51afaf0749 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -31,6 +31,7 @@ #include <linux/export.h> #include <linux/mm.h> +#include <drm/ttm/ttm_allocation.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_tt.h> @@ -198,8 +199,7 @@ EXPORT_SYMBOL(ttm_device_swapout); * @dev: The core kernel device pointer for DMA mappings and allocations. * @mapping: The address space to use for this bo. * @vma_manager: A pointer to a vma manager. - * @use_dma_alloc: If coherent DMA allocation API should be used. - * @use_dma32: If we should use GFP_DMA32 for device memory allocations. + * @alloc_flags: TTM_ALLOCATION_* flags. * * Initializes a struct ttm_device: * Returns: @@ -208,7 +208,7 @@ EXPORT_SYMBOL(ttm_device_swapout); int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *funcs, struct device *dev, struct address_space *mapping, struct drm_vma_offset_manager *vma_manager, - bool use_dma_alloc, bool use_dma32) + unsigned int alloc_flags) { struct ttm_global *glob = &ttm_glob; int ret, nid; @@ -227,6 +227,7 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func return -ENOMEM; } + bdev->alloc_flags = alloc_flags; bdev->funcs = funcs; ttm_sys_man_init(bdev); @@ -236,7 +237,7 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func else nid = NUMA_NO_NODE; - ttm_pool_init(&bdev->pool, dev, nid, use_dma_alloc, use_dma32); + ttm_pool_init(&bdev->pool, dev, nid, alloc_flags); bdev->vma_manager = vma_manager; spin_lock_init(&bdev->lru_lock); diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index b3fffe7b5062..aa137ead5cc5 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -74,7 +74,8 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp) #endif /* CONFIG_UML */ #endif /* __i386__ || __x86_64__ */ #if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \ - defined(__powerpc__) || defined(__mips__) || defined(__loongarch__) + defined(__powerpc__) || defined(__mips__) || defined(__loongarch__) || \ + defined(__riscv) if (caching == ttm_write_combined) tmp = pgprot_writecombine(tmp); else diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index baf27c70a419..18b6db015619 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -48,6 +48,7 @@ #include <drm/ttm/ttm_bo.h> #include "ttm_module.h" +#include "ttm_pool_internal.h" #ifdef CONFIG_FAULT_INJECTION #include <linux/fault-inject.h> @@ -135,6 +136,7 @@ static DECLARE_RWSEM(pool_shrink_rwsem); static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, unsigned int order) { + const unsigned int beneficial_order = ttm_pool_beneficial_order(pool); unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS; struct ttm_pool_dma *dma; struct page *p; @@ -148,7 +150,14 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, gfp_flags |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | __GFP_THISNODE; - if (!pool->use_dma_alloc) { + /* + * Do not add latency to the allocation path for allocations orders + * device tolds us do not bring them additional performance gains. + */ + if (beneficial_order && order > beneficial_order) + gfp_flags &= ~__GFP_DIRECT_RECLAIM; + + if (!ttm_pool_uses_dma_alloc(pool)) { p = alloc_pages_node(pool->nid, gfp_flags, order); if (p) p->private = order; @@ -200,7 +209,7 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, set_pages_wb(p, 1 << order); #endif - if (!pool || !pool->use_dma_alloc) { + if (!pool || !ttm_pool_uses_dma_alloc(pool)) { __free_pages(p, order); return; } @@ -243,7 +252,7 @@ static int ttm_pool_map(struct ttm_pool *pool, unsigned int order, { dma_addr_t addr; - if (pool->use_dma_alloc) { + if (ttm_pool_uses_dma_alloc(pool)) { struct ttm_pool_dma *dma = (void *)p->private; addr = dma->addr; @@ -265,7 +274,7 @@ static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr, unsigned int num_pages) { /* Unmapped while freeing the page */ - if (pool->use_dma_alloc) + if (ttm_pool_uses_dma_alloc(pool)) return; dma_unmap_page(pool->dev, dma_addr, (long)num_pages << PAGE_SHIFT, @@ -339,7 +348,7 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool, enum ttm_caching caching, unsigned int order) { - if (pool->use_dma_alloc) + if (ttm_pool_uses_dma_alloc(pool)) return &pool->caching[caching].orders[order]; #ifdef CONFIG_X86 @@ -348,7 +357,7 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool, if (pool->nid != NUMA_NO_NODE) return &pool->caching[caching].orders[order]; - if (pool->use_dma32) + if (ttm_pool_uses_dma32(pool)) return &global_dma32_write_combined[order]; return &global_write_combined[order]; @@ -356,7 +365,7 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool, if (pool->nid != NUMA_NO_NODE) return &pool->caching[caching].orders[order]; - if (pool->use_dma32) + if (ttm_pool_uses_dma32(pool)) return &global_dma32_uncached[order]; return &global_uncached[order]; @@ -396,7 +405,7 @@ static unsigned int ttm_pool_shrink(void) /* Return the allocation order based for a page */ static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p) { - if (pool->use_dma_alloc) { + if (ttm_pool_uses_dma_alloc(pool)) { struct ttm_pool_dma *dma = (void *)p->private; return dma->vaddr & ~PAGE_MASK; @@ -719,7 +728,7 @@ static int __ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, if (ctx->gfp_retry_mayfail) gfp_flags |= __GFP_RETRY_MAYFAIL; - if (pool->use_dma32) + if (ttm_pool_uses_dma32(pool)) gfp_flags |= GFP_DMA32; else gfp_flags |= GFP_HIGHUSER; @@ -977,7 +986,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, return -EINVAL; if ((!ttm_backup_bytes_avail() && !flags->purge) || - pool->use_dma_alloc || ttm_tt_is_backed_up(tt)) + ttm_pool_uses_dma_alloc(pool) || ttm_tt_is_backed_up(tt)) return -EBUSY; #ifdef CONFIG_X86 @@ -1014,7 +1023,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, if (flags->purge) return shrunken; - if (pool->use_dma32) + if (ttm_pool_uses_dma32(pool)) gfp = GFP_DMA32; else gfp = GFP_HIGHUSER; @@ -1058,22 +1067,20 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, * @pool: the pool to initialize * @dev: device for DMA allocations and mappings * @nid: NUMA node to use for allocations - * @use_dma_alloc: true if coherent DMA alloc should be used - * @use_dma32: true if GFP_DMA32 should be used + * @alloc_flags: TTM_ALLOCATION_POOL_* flags * * Initialize the pool and its pool types. */ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, - int nid, bool use_dma_alloc, bool use_dma32) + int nid, unsigned int alloc_flags) { unsigned int i, j; - WARN_ON(!dev && use_dma_alloc); + WARN_ON(!dev && ttm_pool_uses_dma_alloc(pool)); pool->dev = dev; pool->nid = nid; - pool->use_dma_alloc = use_dma_alloc; - pool->use_dma32 = use_dma32; + pool->alloc_flags = alloc_flags; for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { for (j = 0; j < NR_PAGE_ORDERS; ++j) { @@ -1239,7 +1246,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) { unsigned int i; - if (!pool->use_dma_alloc && pool->nid == NUMA_NO_NODE) { + if (!ttm_pool_uses_dma_alloc(pool) && pool->nid == NUMA_NO_NODE) { seq_puts(m, "unused\n"); return 0; } @@ -1250,7 +1257,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { if (!ttm_pool_select_type(pool, i, 0)) continue; - if (pool->use_dma_alloc) + if (ttm_pool_uses_dma_alloc(pool)) seq_puts(m, "DMA "); else seq_printf(m, "N%d ", pool->nid); diff --git a/drivers/gpu/drm/ttm/ttm_pool_internal.h b/drivers/gpu/drm/ttm/ttm_pool_internal.h new file mode 100644 index 000000000000..82c4b7e56a99 --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_pool_internal.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* Copyright (c) 2025 Valve Corporation */ + +#ifndef _TTM_POOL_INTERNAL_H_ +#define _TTM_POOL_INTERNAL_H_ + +#include <drm/ttm/ttm_allocation.h> +#include <drm/ttm/ttm_pool.h> + +static inline bool ttm_pool_uses_dma_alloc(struct ttm_pool *pool) +{ + return pool->alloc_flags & TTM_ALLOCATION_POOL_USE_DMA_ALLOC; +} + +static inline bool ttm_pool_uses_dma32(struct ttm_pool *pool) +{ + return pool->alloc_flags & TTM_ALLOCATION_POOL_USE_DMA32; +} + +static inline bool ttm_pool_beneficial_order(struct ttm_pool *pool) +{ + return pool->alloc_flags & 0xff; +} + +#endif diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index e2c82ad07eb4..f5aa29dc6ec0 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -34,6 +34,7 @@ #include <drm/ttm/ttm_resource.h> #include <drm/ttm/ttm_tt.h> +#include <drm/drm_print.h> #include <drm/drm_util.h> /* Detach the cursor from the bulk move list*/ @@ -523,14 +524,15 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man, { unsigned i; - spin_lock_init(&man->move_lock); man->bdev = bdev; man->size = size; man->usage = 0; for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) INIT_LIST_HEAD(&man->lru[i]); - man->move = NULL; + spin_lock_init(&man->eviction_lock); + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) + man->eviction_fences[i] = NULL; } EXPORT_SYMBOL(ttm_resource_manager_init); @@ -551,7 +553,7 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, .no_wait_gpu = false, }; struct dma_fence *fence; - int ret; + int ret, i; do { ret = ttm_bo_evict_first(bdev, man, &ctx); @@ -561,18 +563,24 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, if (ret && ret != -ENOENT) return ret; - spin_lock(&man->move_lock); - fence = dma_fence_get(man->move); - spin_unlock(&man->move_lock); - - if (fence) { - ret = dma_fence_wait(fence, false); - dma_fence_put(fence); - if (ret) - return ret; + ret = 0; + + spin_lock(&man->eviction_lock); + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) { + fence = man->eviction_fences[i]; + if (fence && !dma_fence_is_signaled(fence)) { + dma_fence_get(fence); + spin_unlock(&man->eviction_lock); + ret = dma_fence_wait(fence, false); + dma_fence_put(fence); + if (ret) + return ret; + spin_lock(&man->eviction_lock); + } } + spin_unlock(&man->eviction_lock); - return 0; + return ret; } EXPORT_SYMBOL(ttm_resource_manager_evict_all); @@ -587,6 +595,9 @@ uint64_t ttm_resource_manager_usage(struct ttm_resource_manager *man) { uint64_t usage; + if (WARN_ON_ONCE(!man->bdev)) + return 0; + spin_lock(&man->bdev->lru_lock); usage = man->usage; spin_unlock(&man->bdev->lru_lock); diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 506e257dfba8..611d20ab966d 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -40,12 +40,14 @@ #include <linux/shmem_fs.h> #include <drm/drm_cache.h> #include <drm/drm_device.h> +#include <drm/drm_print.h> #include <drm/drm_util.h> #include <drm/ttm/ttm_backup.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_tt.h> #include "ttm_module.h" +#include "ttm_pool_internal.h" static unsigned long ttm_pages_limit; @@ -93,7 +95,8 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc) * mapped TT pages need to be decrypted or otherwise the drivers * will end up sending encrypted mem to the gpu. */ - if (bdev->pool.use_dma_alloc && cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { + if (ttm_pool_uses_dma_alloc(&bdev->pool) && + cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { page_flags |= TTM_TT_FLAG_DECRYPTED; drm_info_once(ddev, "TT memory decryption enabled."); } @@ -378,7 +381,7 @@ int ttm_tt_populate(struct ttm_device *bdev, if (!(ttm->page_flags & TTM_TT_FLAG_EXTERNAL)) { atomic_long_add(ttm->num_pages, &ttm_pages_allocated); - if (bdev->pool.use_dma32) + if (ttm_pool_uses_dma32(&bdev->pool)) atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated); } @@ -416,7 +419,7 @@ int ttm_tt_populate(struct ttm_device *bdev, error: if (!(ttm->page_flags & TTM_TT_FLAG_EXTERNAL)) { atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); - if (bdev->pool.use_dma32) + if (ttm_pool_uses_dma32(&bdev->pool)) atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated); } @@ -439,7 +442,7 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) if (!(ttm->page_flags & TTM_TT_FLAG_EXTERNAL)) { atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); - if (bdev->pool.use_dma32) + if (ttm_pool_uses_dma32(&bdev->pool)) atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated); } |
