diff options
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r-- | drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/tests/xe_pci.c | 17 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_bo.c | 34 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_bo_evict.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_configfs.c | 23 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_device.c | 21 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_gt_idle.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_guc_submit.c | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_hw_engine_group.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_late_bind_fw.c | 20 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_migrate.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_pci.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_pm.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_query.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_svm.c | 28 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_svm.h | 14 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_userptr.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 33 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm_types.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vram.c | 34 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vram.h | 1 |
21 files changed, 193 insertions, 95 deletions
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 06cb6b02ec64..51f2a03847f9 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -342,6 +342,7 @@ #define POWERGATE_ENABLE XE_REG(0xa210) #define RENDER_POWERGATE_ENABLE REG_BIT(0) #define MEDIA_POWERGATE_ENABLE REG_BIT(1) +#define MEDIA_SAMPLERS_POWERGATE_ENABLE REG_BIT(2) #define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n)) #define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n)) diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 49b37dfd4e58..663a79ec960d 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -66,6 +66,7 @@ KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc); /** * xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -211,15 +212,15 @@ static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc) * param generator can be used for both */ static const struct xe_ip pre_gmdid_graphics_ips[] = { - graphics_ip_xelp, - graphics_ip_xelpp, - graphics_ip_xehpg, - graphics_ip_xehpc, + { 1200, "Xe_LP", &graphics_xelp }, + { 1210, "Xe_LP+", &graphics_xelp }, + { 1255, "Xe_HPG", &graphics_xehpg }, + { 1260, "Xe_HPC", &graphics_xehpc }, }; static const struct xe_ip pre_gmdid_media_ips[] = { - media_ip_xem, - media_ip_xehpm, + { 1200, "Xe_M", &media_xem }, + { 1255, "Xe_HPM", &media_xem }, }; KUNIT_ARRAY_PARAM(pre_gmdid_graphics_ip, pre_gmdid_graphics_ips, xe_ip_kunit_desc); @@ -242,6 +243,7 @@ KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); /** * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -266,6 +268,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); /** * xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -290,6 +293,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); /** * xe_pci_id_gen_param - Generate struct pci_device_id parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -376,6 +380,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); /** * xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters + * @test: test context object * @prev: the previously returned value, or NULL for the first iteration * @desc: the buffer for a parameter name * diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 8422f3cab113..4410e28dee54 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1737,6 +1737,24 @@ static bool should_migrate_to_smem(struct xe_bo *bo) bo->attr.atomic_access == DRM_XE_ATOMIC_CPU; } +static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx) +{ + long lerr; + + if (ctx->no_wait_gpu) + return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ? + 0 : -EBUSY; + + lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + ctx->interruptible, MAX_SCHEDULE_TIMEOUT); + if (lerr < 0) + return lerr; + if (lerr == 0) + return -EBUSY; + + return 0; +} + /* Populate the bo if swapped out, or migrate if the access mode requires that. */ static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx, struct drm_exec *exec) @@ -1745,10 +1763,9 @@ static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx, int err = 0; if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) { - xe_assert(xe_bo_device(bo), - dma_resv_test_signaled(tbo->base.resv, DMA_RESV_USAGE_KERNEL) || - (tbo->ttm && ttm_tt_is_populated(tbo->ttm))); - err = ttm_bo_populate(&bo->ttm, ctx); + err = xe_bo_wait_usage_kernel(bo, ctx); + if (!err) + err = ttm_bo_populate(&bo->ttm, ctx); } else if (should_migrate_to_smem(bo)) { xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM); err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec); @@ -1922,7 +1939,6 @@ static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf) .no_wait_gpu = false, .gfp_retry_mayfail = retry_after_wait, }; - long lerr; err = drm_exec_lock_obj(&exec, &tbo->base); drm_exec_retry_on_contention(&exec); @@ -1942,13 +1958,9 @@ static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf) break; } - lerr = dma_resv_wait_timeout(tbo->base.resv, - DMA_RESV_USAGE_KERNEL, true, - MAX_SCHEDULE_TIMEOUT); - if (lerr < 0) { - err = lerr; + err = xe_bo_wait_usage_kernel(bo, &tctx); + if (err) break; - } if (!retry_after_wait) ret = __xe_bo_cpu_fault(vmf, xe, bo); diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index d5dbc51e8612..bc5b4c5fab81 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -182,7 +182,6 @@ int xe_bo_evict_all(struct xe_device *xe) static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) { - struct xe_device *xe = xe_bo_device(bo); int ret; ret = xe_bo_restore_pinned(bo); @@ -201,13 +200,6 @@ static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) } } - /* - * We expect validate to trigger a move VRAM and our move code - * should setup the iosys map. - */ - xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) || - !iosys_map_is_null(&bo->vmap)); - return 0; } diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 8a9b950e7a6d..139663423185 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -126,8 +126,20 @@ * not intended for normal execution and will taint the kernel with TAINT_TEST * when used. * - * Currently this is implemented only for post and mid context restore. - * Examples: + * The syntax allows to pass straight instructions to be executed by the engine + * in a batch buffer or set specific registers. + * + * #. Generic instruction:: + * + * <engine-class> cmd <instr> [[dword0] [dword1] [...]] + * + * #. Simple register setting:: + * + * <engine-class> reg <address> <value> + * + * Commands are saved per engine class: all instances of that class will execute + * those commands during context switch. The instruction, dword arguments, + * addresses and values are in hex format like in the examples below. * * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 after the * normal context restore:: @@ -154,7 +166,8 @@ * When using multiple lines, make sure to use a command that is * implemented with a single write syscall, like HEREDOC. * - * These attributes can only be set before binding to the device. + * Currently this is implemented only for post and mid context restore and + * these attributes can only be set before binding to the device. * * Remove devices * ============== @@ -324,8 +337,8 @@ static const struct engine_info *lookup_engine_info(const char *pattern, u64 *ma continue; pattern += strlen(engine_info[i].cls); - if (!mask && !*pattern) - return &engine_info[i]; + if (!mask) + return *pattern ? NULL : &engine_info[i]; if (!strcmp(pattern, "*")) { *mask = engine_info[i].mask; diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index fdb7b7498920..34d33965eac2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -685,16 +685,16 @@ static int wait_for_lmem_ready(struct xe_device *xe) } ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */ -static void sriov_update_device_info(struct xe_device *xe) +static void vf_update_device_info(struct xe_device *xe) { + xe_assert(xe, IS_SRIOV_VF(xe)); /* disable features that are not available/applicable to VFs */ - if (IS_SRIOV_VF(xe)) { - xe->info.probe_display = 0; - xe->info.has_heci_cscfi = 0; - xe->info.has_heci_gscfi = 0; - xe->info.skip_guc_pc = 1; - xe->info.skip_pcode = 1; - } + xe->info.probe_display = 0; + xe->info.has_heci_cscfi = 0; + xe->info.has_heci_gscfi = 0; + xe->info.has_late_bind = 0; + xe->info.skip_guc_pc = 1; + xe->info.skip_pcode = 1; } static int xe_device_vram_alloc(struct xe_device *xe) @@ -735,7 +735,8 @@ int xe_device_probe_early(struct xe_device *xe) xe_sriov_probe_early(xe); - sriov_update_device_info(xe); + if (IS_SRIOV_VF(xe)) + vf_update_device_info(xe); err = xe_pcode_probe_early(xe); if (err || xe_survivability_mode_is_requested(xe)) { @@ -1069,7 +1070,7 @@ void xe_device_l2_flush(struct xe_device *xe) spin_lock(>->global_invl_lock); xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); - if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) + if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 1000, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); spin_unlock(>->global_invl_lock); diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index f8950a52d0a4..bdc9d9877ec4 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -124,6 +124,9 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (xe_gt_is_main_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + gtidle->powergate_enable |= MEDIA_SAMPLERS_POWERGATE_ENABLE; + if (xe->info.platform != XE_DG1) { for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if ((gt->info.engine_mask & BIT(i))) @@ -246,6 +249,11 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n, str_up_down(pg_status & media_slices[n].status_bit)); } + + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + drm_printf(p, "Media Samplers Power Gating Enabled: %s\n", + str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE)); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 53024eb5670b..94ed8159496f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -44,6 +44,7 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_trace.h" +#include "xe_uc_fw.h" #include "xe_vm.h" static struct xe_guc * @@ -1489,7 +1490,17 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); trace_xe_exec_queue_cleanup_entity(q); - if (exec_queue_registered(q)) + /* + * Expected state transitions for cleanup: + * - If the exec queue is registered and GuC firmware is running, we must first + * disable scheduling and deregister the queue to ensure proper teardown and + * resource release in the GuC, then destroy the exec queue on driver side. + * - If the GuC is already stopped (e.g., during driver unload or GPU reset), + * we cannot expect a response for the deregister request. In this case, + * it is safe to directly destroy the exec queue on driver side, as the GuC + * will not process further requests and all resources must be cleaned up locally. + */ + if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) disable_scheduling_deregister(guc, q); else __guc_exec_queue_destroy(guc, q); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 58bee3ffe881..fa4db5f23342 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -213,17 +213,13 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group err = q->ops->suspend_wait(q); if (err) - goto err_suspend; + return err; } if (need_resume) xe_hw_engine_group_resume_faulting_lr_jobs(group); return 0; - -err_suspend: - up_write(&group->mode_sem); - return err; } /** diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c index 38f3feb2aecd..768442ca7da6 100644 --- a/drivers/gpu/drm/xe/xe_late_bind_fw.c +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c @@ -60,7 +60,7 @@ static int parse_cpd_header(struct xe_late_bind_fw *lb_fw, const struct gsc_manifest_header *manifest; const struct gsc_cpd_entry *entry; size_t min_size = sizeof(*header); - u32 offset; + u32 offset = 0; int i; /* manifest_entry is mandatory */ @@ -116,7 +116,7 @@ static int parse_lb_layout(struct xe_late_bind_fw *lb_fw, const struct csc_fpt_header *header = data; const struct csc_fpt_entry *entry; size_t min_size = sizeof(*header); - u32 offset; + u32 offset = 0; int i; /* fpt_entry is mandatory */ @@ -184,17 +184,13 @@ static const char *xe_late_bind_parse_status(uint32_t status) } } -static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind) +static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind, u32 *num_fans) { struct xe_device *xe = late_bind_to_xe(late_bind); struct xe_tile *root_tile = xe_device_get_root_tile(xe); - u32 uval; - if (!xe_pcode_read(root_tile, - PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), &uval, NULL)) - return uval; - else - return 0; + return xe_pcode_read(root_tile, + PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), num_fans, NULL); } void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind) @@ -314,7 +310,11 @@ static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id) lb_fw->flags &= ~INTEL_LB_FLAG_IS_PERSISTENT; if (lb_fw->type == INTEL_LB_TYPE_FAN_CONTROL) { - num_fans = xe_late_bind_fw_num_fans(late_bind); + ret = xe_late_bind_fw_num_fans(late_bind, &num_fans); + if (ret) { + drm_dbg(&xe->drm, "Failed to read number of fans: %d\n", ret); + return 0; /* Not a fatal error, continue without fan control */ + } drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans); if (!num_fans) return 0; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 1d667fa36cf3..a36ce7dce8cc 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -434,7 +434,7 @@ int xe_migrate_init(struct xe_migrate *m) err = xe_migrate_lock_prepare_vm(tile, m, vm); if (err) - return err; + goto err_out; if (xe->info.has_usm) { struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, @@ -2113,7 +2113,9 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, if (current_bytes & ~PAGE_MASK) { int pitch = 4; - current_bytes = min_t(int, current_bytes, S16_MAX * pitch); + current_bytes = min_t(int, current_bytes, + round_down(S16_MAX * pitch, + XE_CACHELINE_BYTES)); } __fence = xe_migrate_vram(m, current_bytes, diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index be91343829dd..9a6df79fc5b6 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -867,6 +867,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; + xe_vram_resize_bar(xe); + err = xe_device_probe_early(xe); /* * In Boot Survivability mode, no drm card is exposed and driver diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index d6625c71115b..2c5a44377994 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -201,7 +201,7 @@ int xe_pm_resume(struct xe_device *xe) if (err) goto err; - xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_i2c_pm_resume(xe, true); xe_irq_resume(xe); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index e1b603aba61b..2e9ff33ed2fe 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -276,8 +276,7 @@ static int query_mem_regions(struct xe_device *xe, mem_regions->mem_regions[0].instance = 0; mem_regions->mem_regions[0].min_page_size = PAGE_SIZE; mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT; - if (perfmon_capable()) - mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); + mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); mem_regions->num_mem_regions = 1; for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { @@ -293,13 +292,11 @@ static int query_mem_regions(struct xe_device *xe, mem_regions->mem_regions[mem_regions->num_mem_regions].total_size = man->size; - if (perfmon_capable()) { - xe_ttm_vram_get_used(man, - &mem_regions->mem_regions - [mem_regions->num_mem_regions].used, - &mem_regions->mem_regions - [mem_regions->num_mem_regions].cpu_visible_used); - } + xe_ttm_vram_get_used(man, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].used, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].cpu_visible_used); mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size = xe_ttm_vram_get_cpu_visible_size(man); diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 7f2f1f041f1d..da2a412f80c0 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -67,11 +67,6 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) range_debug(range, operation); } -static void *xe_svm_devm_owner(struct xe_device *xe) -{ - return xe; -} - static struct drm_gpusvm_range * xe_svm_range_alloc(struct drm_gpusvm *gpusvm) { @@ -744,15 +739,14 @@ int xe_svm_init(struct xe_vm *vm) xe_svm_garbage_collector_work_func); err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, - current->mm, xe_svm_devm_owner(vm->xe), 0, - vm->size, + current->mm, 0, vm->size, xe_modparam.svm_notifier_size * SZ_1M, &gpusvm_ops, fault_chunk_sizes, ARRAY_SIZE(fault_chunk_sizes)); drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); } else { err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", - &vm->xe->drm, NULL, NULL, 0, 0, 0, NULL, + &vm->xe->drm, NULL, 0, 0, 0, NULL, NULL, 0); } @@ -1017,6 +1011,7 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, .devmem_only = need_vram && devmem_possible, .timeslice_ms = need_vram && devmem_possible ? vm->xe->atomic_svm_timeslice_ms : 0, + .device_private_page_owner = xe_svm_devm_owner(vm->xe), }; struct xe_validation_ctx vctx; struct drm_exec exec; @@ -1039,6 +1034,9 @@ retry: if (err) return err; + dpagemap = xe_vma_resolve_pagemap(vma, tile); + if (!dpagemap && !ctx.devmem_only) + ctx.device_private_page_owner = NULL; range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); if (IS_ERR(range)) @@ -1059,7 +1057,6 @@ retry: range_debug(range, "PAGE FAULT"); - dpagemap = xe_vma_resolve_pagemap(vma, tile); if (--migrate_try_count >= 0 && xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) { ktime_t migrate_start = xe_svm_stats_ktime_get(); @@ -1078,7 +1075,17 @@ retry: drm_dbg(&vm->xe->drm, "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", vm->usm.asid, ERR_PTR(err)); - goto retry; + + /* + * In the devmem-only case, mixed mappings may + * be found. The get_pages function will fix + * these up to a single location, allowing the + * page fault handler to make forward progress. + */ + if (ctx.devmem_only) + goto get_pages; + else + goto retry; } else { drm_err(&vm->xe->drm, "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", @@ -1088,6 +1095,7 @@ retry: } } +get_pages: get_pages_start = xe_svm_stats_ktime_get(); range_debug(range, "GET PAGES"); diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index cef6ee7d6fe3..0955d2ac8d74 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -6,6 +6,20 @@ #ifndef _XE_SVM_H_ #define _XE_SVM_H_ +struct xe_device; + +/** + * xe_svm_devm_owner() - Return the owner of device private memory + * @xe: The xe device. + * + * Return: The owner of this device's device private memory to use in + * hmm_range_fault()- + */ +static inline void *xe_svm_devm_owner(struct xe_device *xe) +{ + return xe; +} + #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) #include <drm/drm_pagemap.h> diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c index 91d09af71ced..f16e92cd8090 100644 --- a/drivers/gpu/drm/xe/xe_userptr.c +++ b/drivers/gpu/drm/xe/xe_userptr.c @@ -54,6 +54,7 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) struct xe_device *xe = vm->xe; struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), + .device_private_page_owner = NULL, }; lockdep_assert_held(&vm->lock); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 0cacab20ff85..f602b874e054 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2832,7 +2832,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, } static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, - bool validate) + bool res_evict, bool validate) { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); @@ -2843,7 +2843,8 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) err = xe_bo_validate(bo, vm, - !xe_vm_in_preempt_fence_mode(vm), exec); + !xe_vm_in_preempt_fence_mode(vm) && + res_evict, exec); } return err; @@ -2881,6 +2882,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) ctx.read_only = xe_vma_read_only(vma); ctx.devmem_possible = devmem_possible; ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; + ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); /* TODO: Threading the migration */ xa_for_each(&op->prefetch_range.range, i, svm_range) { @@ -2912,14 +2914,23 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) } static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, - struct xe_vma_op *op) + struct xe_vma_ops *vops, struct xe_vma_op *op) { int err = 0; + bool res_evict; + + /* + * We only allow evicting a BO within the VM if it is not part of an + * array of binds, as an array of binds can evict another BO within the + * bind. + */ + res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); switch (op->base.op) { case DRM_GPUVA_OP_MAP: if (!op->map.invalidate_on_bind) err = vma_lock_and_validate(exec, op->map.vma, + res_evict, !xe_vm_in_fault_mode(vm) || op->map.immediate); break; @@ -2930,11 +2941,13 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.remap.unmap->va), - false); + res_evict, false); if (!err && op->remap.prev) - err = vma_lock_and_validate(exec, op->remap.prev, true); + err = vma_lock_and_validate(exec, op->remap.prev, + res_evict, true); if (!err && op->remap.next) - err = vma_lock_and_validate(exec, op->remap.next, true); + err = vma_lock_and_validate(exec, op->remap.next, + res_evict, true); break; case DRM_GPUVA_OP_UNMAP: err = check_ufence(gpuva_to_vma(op->base.unmap.va)); @@ -2943,7 +2956,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.unmap.va), - false); + res_evict, false); break; case DRM_GPUVA_OP_PREFETCH: { @@ -2958,7 +2971,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.prefetch.va), - false); + res_evict, false); if (!err && !xe_vma_has_no_bo(vma)) err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region], @@ -3004,7 +3017,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, return err; list_for_each_entry(op, &vops->list, link) { - err = op_lock_and_prep(exec, vm, op); + err = op_lock_and_prep(exec, vm, vops, op); if (err) return err; } @@ -3637,6 +3650,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); + if (args->num_binds > 1) + vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index da39940501d8..413353e1c225 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -476,6 +476,7 @@ struct xe_vma_ops { /** @flag: signify the properties within xe_vma_ops*/ #define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) #define XE_VMA_OPS_FLAG_MADVISE BIT(1) +#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2) u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index b44ebf50fedb..652df7a5f4f6 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -26,15 +26,35 @@ #define BAR_SIZE_SHIFT 20 -static void -_resize_bar(struct xe_device *xe, int resno, resource_size_t size) +/* + * Release all the BARs that could influence/block LMEMBAR resizing, i.e. + * assigned IORESOURCE_MEM_64 BARs + */ +static void release_bars(struct pci_dev *pdev) +{ + struct resource *res; + int i; + + pci_dev_for_each_resource(pdev, res, i) { + /* Resource already un-assigned, do not reset it */ + if (!res->parent) + continue; + + /* No need to release unrelated BARs */ + if (!(res->flags & IORESOURCE_MEM_64)) + continue; + + pci_release_resource(pdev, i); + } +} + +static void resize_bar(struct xe_device *xe, int resno, resource_size_t size) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int bar_size = pci_rebar_bytes_to_size(size); int ret; - if (pci_resource_len(pdev, resno)) - pci_release_resource(pdev, resno); + release_bars(pdev); ret = pci_resize_resource(pdev, resno, bar_size); if (ret) { @@ -50,7 +70,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) * if force_vram_bar_size is set, attempt to set to the requested size * else set to maximum possible size */ -static void resize_vram_bar(struct xe_device *xe) +void xe_vram_resize_bar(struct xe_device *xe) { int force_vram_bar_size = xe_modparam.force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -119,7 +139,7 @@ static void resize_vram_bar(struct xe_device *xe) pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); - _resize_bar(xe, LMEM_BAR, rebar_size); + resize_bar(xe, LMEM_BAR, rebar_size); pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); @@ -148,8 +168,6 @@ static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region * return -ENXIO; } - resize_vram_bar(xe); - lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); if (!lmem_bar->io_size) diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index 72860f714fc6..13505cfb184d 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -11,6 +11,7 @@ struct xe_device; struct xe_vram_region; +void xe_vram_resize_bar(struct xe_device *xe); int xe_vram_probe(struct xe_device *xe); struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement); |