summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h1
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci.c17
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c34
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c8
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.c23
-rw-r--r--drivers/gpu/drm/xe/xe_device.c21
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c8
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c13
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group.c6
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw.c20
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c6
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c2
-rw-r--r--drivers/gpu/drm/xe/xe_query.c15
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c28
-rw-r--r--drivers/gpu/drm/xe/xe_svm.h14
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.c1
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c33
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_vram.c34
-rw-r--r--drivers/gpu/drm/xe/xe_vram.h1
21 files changed, 193 insertions, 95 deletions
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 06cb6b02ec64..51f2a03847f9 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -342,6 +342,7 @@
#define POWERGATE_ENABLE XE_REG(0xa210)
#define RENDER_POWERGATE_ENABLE REG_BIT(0)
#define MEDIA_POWERGATE_ENABLE REG_BIT(1)
+#define MEDIA_SAMPLERS_POWERGATE_ENABLE REG_BIT(2)
#define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n))
#define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n))
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index 49b37dfd4e58..663a79ec960d 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -66,6 +66,7 @@ KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc);
/**
* xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters
+ * @test: test context object
* @prev: the pointer to the previous parameter to iterate from or NULL
* @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
*
@@ -211,15 +212,15 @@ static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc)
* param generator can be used for both
*/
static const struct xe_ip pre_gmdid_graphics_ips[] = {
- graphics_ip_xelp,
- graphics_ip_xelpp,
- graphics_ip_xehpg,
- graphics_ip_xehpc,
+ { 1200, "Xe_LP", &graphics_xelp },
+ { 1210, "Xe_LP+", &graphics_xelp },
+ { 1255, "Xe_HPG", &graphics_xehpg },
+ { 1260, "Xe_HPC", &graphics_xehpc },
};
static const struct xe_ip pre_gmdid_media_ips[] = {
- media_ip_xem,
- media_ip_xehpm,
+ { 1200, "Xe_M", &media_xem },
+ { 1255, "Xe_HPM", &media_xem },
};
KUNIT_ARRAY_PARAM(pre_gmdid_graphics_ip, pre_gmdid_graphics_ips, xe_ip_kunit_desc);
@@ -242,6 +243,7 @@ KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc);
/**
* xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters
+ * @test: test context object
* @prev: the pointer to the previous parameter to iterate from or NULL
* @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
*
@@ -266,6 +268,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param);
/**
* xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters
+ * @test: test context object
* @prev: the pointer to the previous parameter to iterate from or NULL
* @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
*
@@ -290,6 +293,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param);
/**
* xe_pci_id_gen_param - Generate struct pci_device_id parameters
+ * @test: test context object
* @prev: the pointer to the previous parameter to iterate from or NULL
* @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
*
@@ -376,6 +380,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init);
/**
* xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters
+ * @test: test context object
* @prev: the previously returned value, or NULL for the first iteration
* @desc: the buffer for a parameter name
*
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 8422f3cab113..4410e28dee54 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1737,6 +1737,24 @@ static bool should_migrate_to_smem(struct xe_bo *bo)
bo->attr.atomic_access == DRM_XE_ATOMIC_CPU;
}
+static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx)
+{
+ long lerr;
+
+ if (ctx->no_wait_gpu)
+ return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ?
+ 0 : -EBUSY;
+
+ lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+ ctx->interruptible, MAX_SCHEDULE_TIMEOUT);
+ if (lerr < 0)
+ return lerr;
+ if (lerr == 0)
+ return -EBUSY;
+
+ return 0;
+}
+
/* Populate the bo if swapped out, or migrate if the access mode requires that. */
static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx,
struct drm_exec *exec)
@@ -1745,10 +1763,9 @@ static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx,
int err = 0;
if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) {
- xe_assert(xe_bo_device(bo),
- dma_resv_test_signaled(tbo->base.resv, DMA_RESV_USAGE_KERNEL) ||
- (tbo->ttm && ttm_tt_is_populated(tbo->ttm)));
- err = ttm_bo_populate(&bo->ttm, ctx);
+ err = xe_bo_wait_usage_kernel(bo, ctx);
+ if (!err)
+ err = ttm_bo_populate(&bo->ttm, ctx);
} else if (should_migrate_to_smem(bo)) {
xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM);
err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec);
@@ -1922,7 +1939,6 @@ static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf)
.no_wait_gpu = false,
.gfp_retry_mayfail = retry_after_wait,
};
- long lerr;
err = drm_exec_lock_obj(&exec, &tbo->base);
drm_exec_retry_on_contention(&exec);
@@ -1942,13 +1958,9 @@ static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf)
break;
}
- lerr = dma_resv_wait_timeout(tbo->base.resv,
- DMA_RESV_USAGE_KERNEL, true,
- MAX_SCHEDULE_TIMEOUT);
- if (lerr < 0) {
- err = lerr;
+ err = xe_bo_wait_usage_kernel(bo, &tctx);
+ if (err)
break;
- }
if (!retry_after_wait)
ret = __xe_bo_cpu_fault(vmf, xe, bo);
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index d5dbc51e8612..bc5b4c5fab81 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -182,7 +182,6 @@ int xe_bo_evict_all(struct xe_device *xe)
static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo)
{
- struct xe_device *xe = xe_bo_device(bo);
int ret;
ret = xe_bo_restore_pinned(bo);
@@ -201,13 +200,6 @@ static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo)
}
}
- /*
- * We expect validate to trigger a move VRAM and our move code
- * should setup the iosys map.
- */
- xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) ||
- !iosys_map_is_null(&bo->vmap));
-
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
index 8a9b950e7a6d..139663423185 100644
--- a/drivers/gpu/drm/xe/xe_configfs.c
+++ b/drivers/gpu/drm/xe/xe_configfs.c
@@ -126,8 +126,20 @@
* not intended for normal execution and will taint the kernel with TAINT_TEST
* when used.
*
- * Currently this is implemented only for post and mid context restore.
- * Examples:
+ * The syntax allows to pass straight instructions to be executed by the engine
+ * in a batch buffer or set specific registers.
+ *
+ * #. Generic instruction::
+ *
+ * <engine-class> cmd <instr> [[dword0] [dword1] [...]]
+ *
+ * #. Simple register setting::
+ *
+ * <engine-class> reg <address> <value>
+ *
+ * Commands are saved per engine class: all instances of that class will execute
+ * those commands during context switch. The instruction, dword arguments,
+ * addresses and values are in hex format like in the examples below.
*
* #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 after the
* normal context restore::
@@ -154,7 +166,8 @@
* When using multiple lines, make sure to use a command that is
* implemented with a single write syscall, like HEREDOC.
*
- * These attributes can only be set before binding to the device.
+ * Currently this is implemented only for post and mid context restore and
+ * these attributes can only be set before binding to the device.
*
* Remove devices
* ==============
@@ -324,8 +337,8 @@ static const struct engine_info *lookup_engine_info(const char *pattern, u64 *ma
continue;
pattern += strlen(engine_info[i].cls);
- if (!mask && !*pattern)
- return &engine_info[i];
+ if (!mask)
+ return *pattern ? NULL : &engine_info[i];
if (!strcmp(pattern, "*")) {
*mask = engine_info[i].mask;
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index fdb7b7498920..34d33965eac2 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -685,16 +685,16 @@ static int wait_for_lmem_ready(struct xe_device *xe)
}
ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
-static void sriov_update_device_info(struct xe_device *xe)
+static void vf_update_device_info(struct xe_device *xe)
{
+ xe_assert(xe, IS_SRIOV_VF(xe));
/* disable features that are not available/applicable to VFs */
- if (IS_SRIOV_VF(xe)) {
- xe->info.probe_display = 0;
- xe->info.has_heci_cscfi = 0;
- xe->info.has_heci_gscfi = 0;
- xe->info.skip_guc_pc = 1;
- xe->info.skip_pcode = 1;
- }
+ xe->info.probe_display = 0;
+ xe->info.has_heci_cscfi = 0;
+ xe->info.has_heci_gscfi = 0;
+ xe->info.has_late_bind = 0;
+ xe->info.skip_guc_pc = 1;
+ xe->info.skip_pcode = 1;
}
static int xe_device_vram_alloc(struct xe_device *xe)
@@ -735,7 +735,8 @@ int xe_device_probe_early(struct xe_device *xe)
xe_sriov_probe_early(xe);
- sriov_update_device_info(xe);
+ if (IS_SRIOV_VF(xe))
+ vf_update_device_info(xe);
err = xe_pcode_probe_early(xe);
if (err || xe_survivability_mode_is_requested(xe)) {
@@ -1069,7 +1070,7 @@ void xe_device_l2_flush(struct xe_device *xe)
spin_lock(&gt->global_invl_lock);
xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
- if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+ if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 1000, NULL, true))
xe_gt_err_once(gt, "Global invalidation timeout\n");
spin_unlock(&gt->global_invl_lock);
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index f8950a52d0a4..bdc9d9877ec4 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -124,6 +124,9 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
if (xe_gt_is_main_type(gt))
gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE;
+ if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255)
+ gtidle->powergate_enable |= MEDIA_SAMPLERS_POWERGATE_ENABLE;
+
if (xe->info.platform != XE_DG1) {
for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
if ((gt->info.engine_mask & BIT(i)))
@@ -246,6 +249,11 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n,
str_up_down(pg_status & media_slices[n].status_bit));
}
+
+ if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255)
+ drm_printf(p, "Media Samplers Power Gating Enabled: %s\n",
+ str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE));
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 53024eb5670b..94ed8159496f 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -44,6 +44,7 @@
#include "xe_ring_ops_types.h"
#include "xe_sched_job.h"
#include "xe_trace.h"
+#include "xe_uc_fw.h"
#include "xe_vm.h"
static struct xe_guc *
@@ -1489,7 +1490,17 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
trace_xe_exec_queue_cleanup_entity(q);
- if (exec_queue_registered(q))
+ /*
+ * Expected state transitions for cleanup:
+ * - If the exec queue is registered and GuC firmware is running, we must first
+ * disable scheduling and deregister the queue to ensure proper teardown and
+ * resource release in the GuC, then destroy the exec queue on driver side.
+ * - If the GuC is already stopped (e.g., during driver unload or GPU reset),
+ * we cannot expect a response for the deregister request. In this case,
+ * it is safe to directly destroy the exec queue on driver side, as the GuC
+ * will not process further requests and all resources must be cleaned up locally.
+ */
+ if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw))
disable_scheduling_deregister(guc, q);
else
__guc_exec_queue_destroy(guc, q);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
index 58bee3ffe881..fa4db5f23342 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
@@ -213,17 +213,13 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group
err = q->ops->suspend_wait(q);
if (err)
- goto err_suspend;
+ return err;
}
if (need_resume)
xe_hw_engine_group_resume_faulting_lr_jobs(group);
return 0;
-
-err_suspend:
- up_write(&group->mode_sem);
- return err;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c
index 38f3feb2aecd..768442ca7da6 100644
--- a/drivers/gpu/drm/xe/xe_late_bind_fw.c
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c
@@ -60,7 +60,7 @@ static int parse_cpd_header(struct xe_late_bind_fw *lb_fw,
const struct gsc_manifest_header *manifest;
const struct gsc_cpd_entry *entry;
size_t min_size = sizeof(*header);
- u32 offset;
+ u32 offset = 0;
int i;
/* manifest_entry is mandatory */
@@ -116,7 +116,7 @@ static int parse_lb_layout(struct xe_late_bind_fw *lb_fw,
const struct csc_fpt_header *header = data;
const struct csc_fpt_entry *entry;
size_t min_size = sizeof(*header);
- u32 offset;
+ u32 offset = 0;
int i;
/* fpt_entry is mandatory */
@@ -184,17 +184,13 @@ static const char *xe_late_bind_parse_status(uint32_t status)
}
}
-static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind)
+static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind, u32 *num_fans)
{
struct xe_device *xe = late_bind_to_xe(late_bind);
struct xe_tile *root_tile = xe_device_get_root_tile(xe);
- u32 uval;
- if (!xe_pcode_read(root_tile,
- PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), &uval, NULL))
- return uval;
- else
- return 0;
+ return xe_pcode_read(root_tile,
+ PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), num_fans, NULL);
}
void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind)
@@ -314,7 +310,11 @@ static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id)
lb_fw->flags &= ~INTEL_LB_FLAG_IS_PERSISTENT;
if (lb_fw->type == INTEL_LB_TYPE_FAN_CONTROL) {
- num_fans = xe_late_bind_fw_num_fans(late_bind);
+ ret = xe_late_bind_fw_num_fans(late_bind, &num_fans);
+ if (ret) {
+ drm_dbg(&xe->drm, "Failed to read number of fans: %d\n", ret);
+ return 0; /* Not a fatal error, continue without fan control */
+ }
drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans);
if (!num_fans)
return 0;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 1d667fa36cf3..a36ce7dce8cc 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -434,7 +434,7 @@ int xe_migrate_init(struct xe_migrate *m)
err = xe_migrate_lock_prepare_vm(tile, m, vm);
if (err)
- return err;
+ goto err_out;
if (xe->info.has_usm) {
struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
@@ -2113,7 +2113,9 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
if (current_bytes & ~PAGE_MASK) {
int pitch = 4;
- current_bytes = min_t(int, current_bytes, S16_MAX * pitch);
+ current_bytes = min_t(int, current_bytes,
+ round_down(S16_MAX * pitch,
+ XE_CACHELINE_BYTES));
}
__fence = xe_migrate_vram(m, current_bytes,
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index be91343829dd..9a6df79fc5b6 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -867,6 +867,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
+ xe_vram_resize_bar(xe);
+
err = xe_device_probe_early(xe);
/*
* In Boot Survivability mode, no drm card is exposed and driver
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index d6625c71115b..2c5a44377994 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -201,7 +201,7 @@ int xe_pm_resume(struct xe_device *xe)
if (err)
goto err;
- xe_i2c_pm_resume(xe, xe->d3cold.allowed);
+ xe_i2c_pm_resume(xe, true);
xe_irq_resume(xe);
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index e1b603aba61b..2e9ff33ed2fe 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -276,8 +276,7 @@ static int query_mem_regions(struct xe_device *xe,
mem_regions->mem_regions[0].instance = 0;
mem_regions->mem_regions[0].min_page_size = PAGE_SIZE;
mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT;
- if (perfmon_capable())
- mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
+ mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
mem_regions->num_mem_regions = 1;
for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
@@ -293,13 +292,11 @@ static int query_mem_regions(struct xe_device *xe,
mem_regions->mem_regions[mem_regions->num_mem_regions].total_size =
man->size;
- if (perfmon_capable()) {
- xe_ttm_vram_get_used(man,
- &mem_regions->mem_regions
- [mem_regions->num_mem_regions].used,
- &mem_regions->mem_regions
- [mem_regions->num_mem_regions].cpu_visible_used);
- }
+ xe_ttm_vram_get_used(man,
+ &mem_regions->mem_regions
+ [mem_regions->num_mem_regions].used,
+ &mem_regions->mem_regions
+ [mem_regions->num_mem_regions].cpu_visible_used);
mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size =
xe_ttm_vram_get_cpu_visible_size(man);
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 7f2f1f041f1d..da2a412f80c0 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -67,11 +67,6 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
range_debug(range, operation);
}
-static void *xe_svm_devm_owner(struct xe_device *xe)
-{
- return xe;
-}
-
static struct drm_gpusvm_range *
xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
{
@@ -744,15 +739,14 @@ int xe_svm_init(struct xe_vm *vm)
xe_svm_garbage_collector_work_func);
err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
- current->mm, xe_svm_devm_owner(vm->xe), 0,
- vm->size,
+ current->mm, 0, vm->size,
xe_modparam.svm_notifier_size * SZ_1M,
&gpusvm_ops, fault_chunk_sizes,
ARRAY_SIZE(fault_chunk_sizes));
drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
} else {
err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)",
- &vm->xe->drm, NULL, NULL, 0, 0, 0, NULL,
+ &vm->xe->drm, NULL, 0, 0, 0, NULL,
NULL, 0);
}
@@ -1017,6 +1011,7 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
.devmem_only = need_vram && devmem_possible,
.timeslice_ms = need_vram && devmem_possible ?
vm->xe->atomic_svm_timeslice_ms : 0,
+ .device_private_page_owner = xe_svm_devm_owner(vm->xe),
};
struct xe_validation_ctx vctx;
struct drm_exec exec;
@@ -1039,6 +1034,9 @@ retry:
if (err)
return err;
+ dpagemap = xe_vma_resolve_pagemap(vma, tile);
+ if (!dpagemap && !ctx.devmem_only)
+ ctx.device_private_page_owner = NULL;
range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx);
if (IS_ERR(range))
@@ -1059,7 +1057,6 @@ retry:
range_debug(range, "PAGE FAULT");
- dpagemap = xe_vma_resolve_pagemap(vma, tile);
if (--migrate_try_count >= 0 &&
xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) {
ktime_t migrate_start = xe_svm_stats_ktime_get();
@@ -1078,7 +1075,17 @@ retry:
drm_dbg(&vm->xe->drm,
"VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
vm->usm.asid, ERR_PTR(err));
- goto retry;
+
+ /*
+ * In the devmem-only case, mixed mappings may
+ * be found. The get_pages function will fix
+ * these up to a single location, allowing the
+ * page fault handler to make forward progress.
+ */
+ if (ctx.devmem_only)
+ goto get_pages;
+ else
+ goto retry;
} else {
drm_err(&vm->xe->drm,
"VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
@@ -1088,6 +1095,7 @@ retry:
}
}
+get_pages:
get_pages_start = xe_svm_stats_ktime_get();
range_debug(range, "GET PAGES");
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index cef6ee7d6fe3..0955d2ac8d74 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -6,6 +6,20 @@
#ifndef _XE_SVM_H_
#define _XE_SVM_H_
+struct xe_device;
+
+/**
+ * xe_svm_devm_owner() - Return the owner of device private memory
+ * @xe: The xe device.
+ *
+ * Return: The owner of this device's device private memory to use in
+ * hmm_range_fault()-
+ */
+static inline void *xe_svm_devm_owner(struct xe_device *xe)
+{
+ return xe;
+}
+
#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
#include <drm/drm_pagemap.h>
diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
index 91d09af71ced..f16e92cd8090 100644
--- a/drivers/gpu/drm/xe/xe_userptr.c
+++ b/drivers/gpu/drm/xe/xe_userptr.c
@@ -54,6 +54,7 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
struct xe_device *xe = vm->xe;
struct drm_gpusvm_ctx ctx = {
.read_only = xe_vma_read_only(vma),
+ .device_private_page_owner = NULL,
};
lockdep_assert_held(&vm->lock);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 0cacab20ff85..f602b874e054 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2832,7 +2832,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
}
static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
- bool validate)
+ bool res_evict, bool validate)
{
struct xe_bo *bo = xe_vma_bo(vma);
struct xe_vm *vm = xe_vma_vm(vma);
@@ -2843,7 +2843,8 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
err = drm_exec_lock_obj(exec, &bo->ttm.base);
if (!err && validate)
err = xe_bo_validate(bo, vm,
- !xe_vm_in_preempt_fence_mode(vm), exec);
+ !xe_vm_in_preempt_fence_mode(vm) &&
+ res_evict, exec);
}
return err;
@@ -2881,6 +2882,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
ctx.read_only = xe_vma_read_only(vma);
ctx.devmem_possible = devmem_possible;
ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
+ ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe);
/* TODO: Threading the migration */
xa_for_each(&op->prefetch_range.range, i, svm_range) {
@@ -2912,14 +2914,23 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
}
static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
- struct xe_vma_op *op)
+ struct xe_vma_ops *vops, struct xe_vma_op *op)
{
int err = 0;
+ bool res_evict;
+
+ /*
+ * We only allow evicting a BO within the VM if it is not part of an
+ * array of binds, as an array of binds can evict another BO within the
+ * bind.
+ */
+ res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
if (!op->map.invalidate_on_bind)
err = vma_lock_and_validate(exec, op->map.vma,
+ res_evict,
!xe_vm_in_fault_mode(vm) ||
op->map.immediate);
break;
@@ -2930,11 +2941,13 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.remap.unmap->va),
- false);
+ res_evict, false);
if (!err && op->remap.prev)
- err = vma_lock_and_validate(exec, op->remap.prev, true);
+ err = vma_lock_and_validate(exec, op->remap.prev,
+ res_evict, true);
if (!err && op->remap.next)
- err = vma_lock_and_validate(exec, op->remap.next, true);
+ err = vma_lock_and_validate(exec, op->remap.next,
+ res_evict, true);
break;
case DRM_GPUVA_OP_UNMAP:
err = check_ufence(gpuva_to_vma(op->base.unmap.va));
@@ -2943,7 +2956,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.unmap.va),
- false);
+ res_evict, false);
break;
case DRM_GPUVA_OP_PREFETCH:
{
@@ -2958,7 +2971,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.prefetch.va),
- false);
+ res_evict, false);
if (!err && !xe_vma_has_no_bo(vma))
err = xe_bo_migrate(xe_vma_bo(vma),
region_to_mem_type[region],
@@ -3004,7 +3017,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
return err;
list_for_each_entry(op, &vops->list, link) {
- err = op_lock_and_prep(exec, vm, op);
+ err = op_lock_and_prep(exec, vm, vops, op);
if (err)
return err;
}
@@ -3637,6 +3650,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
}
xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
+ if (args->num_binds > 1)
+ vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS;
for (i = 0; i < args->num_binds; ++i) {
u64 range = bind_ops[i].range;
u64 addr = bind_ops[i].addr;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index da39940501d8..413353e1c225 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -476,6 +476,7 @@ struct xe_vma_ops {
/** @flag: signify the properties within xe_vma_ops*/
#define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0)
#define XE_VMA_OPS_FLAG_MADVISE BIT(1)
+#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2)
u32 flags;
#ifdef TEST_VM_OPS_ERROR
/** @inject_error: inject error to test error handling */
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index b44ebf50fedb..652df7a5f4f6 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -26,15 +26,35 @@
#define BAR_SIZE_SHIFT 20
-static void
-_resize_bar(struct xe_device *xe, int resno, resource_size_t size)
+/*
+ * Release all the BARs that could influence/block LMEMBAR resizing, i.e.
+ * assigned IORESOURCE_MEM_64 BARs
+ */
+static void release_bars(struct pci_dev *pdev)
+{
+ struct resource *res;
+ int i;
+
+ pci_dev_for_each_resource(pdev, res, i) {
+ /* Resource already un-assigned, do not reset it */
+ if (!res->parent)
+ continue;
+
+ /* No need to release unrelated BARs */
+ if (!(res->flags & IORESOURCE_MEM_64))
+ continue;
+
+ pci_release_resource(pdev, i);
+ }
+}
+
+static void resize_bar(struct xe_device *xe, int resno, resource_size_t size)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
int bar_size = pci_rebar_bytes_to_size(size);
int ret;
- if (pci_resource_len(pdev, resno))
- pci_release_resource(pdev, resno);
+ release_bars(pdev);
ret = pci_resize_resource(pdev, resno, bar_size);
if (ret) {
@@ -50,7 +70,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
* if force_vram_bar_size is set, attempt to set to the requested size
* else set to maximum possible size
*/
-static void resize_vram_bar(struct xe_device *xe)
+void xe_vram_resize_bar(struct xe_device *xe)
{
int force_vram_bar_size = xe_modparam.force_vram_bar_size;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -119,7 +139,7 @@ static void resize_vram_bar(struct xe_device *xe)
pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
- _resize_bar(xe, LMEM_BAR, rebar_size);
+ resize_bar(xe, LMEM_BAR, rebar_size);
pci_assign_unassigned_bus_resources(pdev->bus);
pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
@@ -148,8 +168,6 @@ static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region *
return -ENXIO;
}
- resize_vram_bar(xe);
-
lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR);
lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR);
if (!lmem_bar->io_size)
diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h
index 72860f714fc6..13505cfb184d 100644
--- a/drivers/gpu/drm/xe/xe_vram.h
+++ b/drivers/gpu/drm/xe/xe_vram.h
@@ -11,6 +11,7 @@
struct xe_device;
struct xe_vram_region;
+void xe_vram_resize_bar(struct xe_device *xe);
int xe_vram_probe(struct xe_device *xe);
struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement);