summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-04 08:53:30 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-04 08:53:30 -0800
commit6dfafbd0299a60bfb5d5e277fdf100037c7ded07 (patch)
treeaccb0e721120091f05107277744187d09e86fe5a /drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
parent94e244d9ccab578f83a218ec58376d025014fcce (diff)
parent0692602defb0c273f80dec9c564ca50726404aca (diff)
Merge tag 'drm-next-2025-12-03' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie: "There was a rather late merge of a new color pipeline feature, that some userspace projects are blocked on, and has seen a lot of work in amdgpu. This should have seen some time in -next. There is additional support for this for Intel, that if it arrives in the next day or two I'll pass it on in another pull request and you can decide if you want to take it. Highlights: - Arm Ethos NPU accelerator driver - new DRM color pipeline support - amdgpu will now run discrete SI/CIK cards instead of radeon, which enables vulkan support in userspace - msm gets gen8 gpu support - initial Xe3P support in xe Full detail summary: New driver: - Arm Ethos-U65/U85 accel driver Core: - support the drm color pipeline in vkms/amdgfx - add support for drm colorop pipeline - add COLOR PIPELINE plane property - add DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE - throttle dirty worker with vblank - use drm_for_each_bridge_in_chain_scoped in drm's bridge code - Ensure drm_client_modeset tests are enabled in UML - add simulated vblank interrupt - use in drivers - dumb buffer sizing helper - move freeing of drm client memory to driver - crtc sharpness strength property - stop using system_wq in scheduler/drivers - support emergency restore in drm-client Rust: - make slice::as_flattened usable on all supported rustc - add FromBytes::from_bytes_prefix() method - remove redundant device ptr from Rust GEM object - Change how AlwaysRefCounted is implemented for GEM objects gpuvm: - Add deferred vm_bo cleanup to GPUVM (for rust) atomic: - cleanup and improve state handling interfaces buddy: - optimize block management dma-buf: - heaps: Create heap per CMA reserved location - improve userspace documentation dp: - add POST_LT_ADJ_REQ training sequence - DPCD dSC quirk for synaptics panamera devices - helpers to query branch DSC max throughput ttm: - Rename ttm_bo_put to ttm_bo_fini - allow page protection flags on risc-v - rework pipelined eviction fence handling amdgpu: - enable amdgpu by default for SI/CI dGPUs - enable DC by default on SI - refactor CIK/SI enablement - add ABM KMS property - Re-enable DM idle optimizations - DC Analog encoders support - Powerplay fixes for fiji/iceland - Enable DC on bonaire by default - HMM cleanup - Add new RAS framework - DML2.1 updates - YCbCr420 fixes - DC FP fixes - DMUB fixes - LTTPR fixes - DTBCLK fixes - DMU cursor offload handling - Userq validation improvements - Unify shutdown callback handling - Suspend improvements - Power limit code cleanup - SR-IOV fixes - AUX backlight fixes - DCN 3.5 fixes - HDMI compliance fixes - DCN 4.0.1 cursor updates - DCN interrupt fix - DC KMS full update improvements - Add additional HDCP traces - DCN 3.2 fixes - DP MST fixes - Add support for new SR-IOV mailbox interface - UQ reset support - HDP flush rework - VCE1 support amdkfd: - HMM cleanups - Relax checks on save area overallocations - Fix GPU mappings after prefetch radeon: - refactor CIK/SI enablement xe: - Initial Xe3P support - panic support on VRAM for display - fix stolen size check - Loosen used tracking restriction - New SR-IOV debugfs structure and debugfs updates - Hide the GPU madvise flag behind a VM_BIND flag - Always expose VRAM provisioning data on discrete GPUs - Allow VRAM mappings for userptr when used with SVM - Allow pinning of p2p dma-buf - Use per-tile debugfs where appropriate - Add documentation for Execution Queues - PF improvements - VF migration recovery redesign work - User / Kernel VRAM partitioning - Update Tile-based messages - Allow configfs to disable specific GT types - VF provisioning and migration improvements - use SVM range helpers in PT layer - Initial CRI support - access VF registers using dedicated MMIO view - limit number of jobs per exec queue - add sriov_admin sysfs tree - more crescent island specific support - debugfs residency counter - SRIOV migration work - runtime registers for GFX 35 i915: - add initial Xe3p_LPD display version 35 support - Enable LNL+ content adaptive sharpness filter - Use optimized VRR guardband - Enable Xe3p LT PHY - enable FBC support for Xe3p_LPD display - add display 30.02 firmware support - refactor SKL+ watermark latency setup - refactor fbdev handling - call i915/xe runtime PM via function pointers - refactor i915/xe stolen memory/display interfaces - use display version instead of gfx version in display code - extend i915_display_info with Type-C port details - lots of display cleanups/refactorings - set O_LARGEFILE in __create_shmem - skuip guc communication warning on reset - fix time conversions - defeature DRRS on LNL+ - refactor intel_frontbuffer split between i915/xe/display - convert inteL_rom interfaces to struct drm_device - unify display register polling interfaces - aovid lock inversion when pinning to GGTT on CHV/BXT+VTD panel: - Add KD116N3730A08/A12, chromebook mt8189 - JT101TM023, LQ079L1SX01, - GLD070WX3-SL01 MIPI DSI - Samsung LTL106AL0, Samsung LTL106AL01 - Raystar RFF500F-AWH-DNN - Winstar WF70A8SYJHLNGA - Wanchanglong w552946aaa - Samsung SOFEF00 - Lenovo X13s panel - ilitek-ili9881c - add rpi 5" support - visionx-rm69299 - add backlight support - edp - support AUI B116XAN02.0 bridge: - improve ref counting - ti-sn65dsi86 - add support for DP mode with HPD - synopsis: support CEC, init timer with correct freq - ASL CS5263 DP-to-HDMI bridge support nova-core: - introduce bitfield! macro - introduce safe integer converters - GSP inits to fully booted state on Ampere - Use more future-proof register for GPU identification nova-drm: - select NOVA_CORE - 64-bit only nouveau: - improve reclocking on tegra 186+ - add large page and compression support msm: - GPU: - Gen8 support: A840 (Kaanapali) and X2-85 (Glymur) - A612 support - MDSS: - Added support for Glymur and QCS8300 platforms - DPU: - Enabled Quad-Pipe support, unlocking higher resolutions support - Added support for Glymur platform - Documented DPU on QCS8300 platform as supported - DisplayPort: - Added support for Glymur platform - Added support lame remapping inside DP block - Documented DisplayPort controller on QCS8300 and SM6150/QCS615 as supported tegra: - NVJPG driver panfrost: - display JM contexts over debugfs - export JM contexts to userspace - improve error and job handling panthor: - support custom ASN_HASH for mt8196 - support mali-G1 GPU - flush shmem write before mapping buffers uncached - make timeout per-queue instead of per-job mediatek: - MT8195/88 HDMIv2/DDCv2 support rockchip: - dsi: add support for RK3368 amdxdna: - enhance runtime PM - last hardware error reading uapi - support firmware debug output - add resource and telemetry data uapi - preemption support imx: - add driver for HDMI TX Parallel audio interface ivpu: - add support for user-managed preemption buffer - add userptr support - update JSM firware API to 3.33.0 - add better alloc/free warnings - fix page fault in unbind all bos - rework bind/unbind of imported buffers - enable MCA ECC signalling - split fw runtime and global memory buffers - add fdinfo memory statistics tidss: - convert to drm logging - logging cleanup ast: - refactor generation init paths - add per chip generation detect_tx_chip - set quirks for each chip model atmel-hlcdc: - set LCDC_ATTRE register in plane disable - set correct values for plane scaler solomon: - use drm helper for get_modes and move_valid sitronix: - fix output position when clearing screens qaic: - support dma-buf exports - support new firmware's READ_DATA implementation - sahara AIC200 image table update - add sysfs support - add coredump support - add uevents support - PM support sun4i: - layer refactors to decouple plane from output - improve DE33 support vc4: - switch to generic CEC helpers komeda: - use drm_ logging functions vkms: - configfs support for display configuration vgem: - fix fence timer deadlock etnaviv: - add HWDB entry for GC8000 Nano Ultra VIP r6205" * tag 'drm-next-2025-12-03' of https://gitlab.freedesktop.org/drm/kernel: (1869 commits) Revert "drm/amd: Skip power ungate during suspend for VPE" drm/amdgpu: use common defines for HUB faults drm/amdgpu/gmc12: add amdgpu_vm_handle_fault() handling drm/amdgpu/gmc11: add amdgpu_vm_handle_fault() handling drm/amdgpu: use static ids for ACP platform devs drm/amdgpu/sdma6: Update SDMA 6.0.3 FW version to include UMQ protected-fence fix drm/amdgpu: Forward VMID reservation errors drm/amdgpu/gmc8: Delegate VM faults to soft IRQ handler ring drm/amdgpu/gmc7: Delegate VM faults to soft IRQ handler ring drm/amdgpu/gmc6: Delegate VM faults to soft IRQ handler ring drm/amdgpu/gmc6: Cache VM fault info drm/amdgpu/gmc6: Don't print MC client as it's unknown drm/amdgpu/cz_ih: Enable soft IRQ handler ring drm/amdgpu/tonga_ih: Enable soft IRQ handler ring drm/amdgpu/iceland_ih: Enable soft IRQ handler ring drm/amdgpu/cik_ih: Enable soft IRQ handler ring drm/amdgpu/si_ih: Enable soft IRQ handler ring drm/amd/display: fix typo in display_mode_core_structs.h drm/amd/display: fix Smart Power OLED not working after S4 drm/amd/display: Move RGB-type check for audio sync to DCE HW sequence ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c343
1 files changed, 316 insertions, 27 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index f96beb96c75c..47a6ce4fdc74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -44,6 +44,18 @@
vf2pf_info->ucode_info[ucode].version = ver; \
} while (0)
+#define mmRCC_CONFIG_MEMSIZE 0xde3
+
+const char *amdgpu_virt_dynamic_crit_table_name[] = {
+ "IP DISCOVERY",
+ "VBIOS IMG",
+ "RAS TELEMETRY",
+ "DATA EXCHANGE",
+ "BAD PAGE INFO",
+ "INIT HEADER",
+ "LAST",
+};
+
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{
/* By now all MMIO pages except mailbox are blocked */
@@ -150,9 +162,10 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
virt->ops->req_init_data(adev);
if (adev->virt.req_init_data_ver > 0)
- DRM_INFO("host supports REQ_INIT_DATA handshake\n");
+ dev_info(adev->dev, "host supports REQ_INIT_DATA handshake of critical_region_version %d\n",
+ adev->virt.req_init_data_ver);
else
- DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
+ dev_warn(adev->dev, "host doesn't support REQ_INIT_DATA handshake\n");
}
/**
@@ -205,12 +218,12 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
&adev->virt.mm_table.gpu_addr,
(void *)&adev->virt.mm_table.cpu_addr);
if (r) {
- DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
+ dev_err(adev->dev, "failed to alloc mm table and error = %d.\n", r);
return r;
}
memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
- DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
+ dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
adev->virt.mm_table.gpu_addr,
adev->virt.mm_table.cpu_addr);
return 0;
@@ -390,7 +403,9 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE,
&bo, NULL))
- DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
+ dev_dbg(adev->dev,
+ "RAS WARN: reserve vram for retired page %llx fail\n",
+ bp);
data->bps_bo[i] = bo;
}
data->last_reserved = i + 1;
@@ -658,10 +673,34 @@ out:
schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
}
+static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)
+{
+ uint32_t dataexchange_offset =
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
+ uint32_t dataexchange_size =
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
+ uint64_t pos = 0;
+
+ dev_info(adev->dev,
+ "Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+ dataexchange_offset, dataexchange_size);
+
+ if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
+ dev_err(adev->dev, "Data exchange data not aligned to 4 bytes\n");
+ return -EINVAL;
+ }
+
+ pos = (uint64_t)dataexchange_offset;
+ amdgpu_device_vram_access(adev, pos, pfvf_data,
+ dataexchange_size, false);
+
+ return 0;
+}
+
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
{
if (adev->virt.vf2pf_update_interval_ms != 0) {
- DRM_INFO("clean up the vf2pf work item\n");
+ dev_info(adev->dev, "clean up the vf2pf work item\n");
cancel_delayed_work_sync(&adev->virt.vf2pf_work);
adev->virt.vf2pf_update_interval_ms = 0;
}
@@ -669,13 +708,15 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
{
+ uint32_t *pfvf_data = NULL;
+
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
adev->virt.vf2pf_update_retry_cnt = 0;
if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
- DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
+ dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!");
} else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
/* go through this logic in ip_init and reset to init workqueue*/
amdgpu_virt_exchange_data(adev);
@@ -684,11 +725,34 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
} else if (adev->bios != NULL) {
/* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
- adev->virt.fw_reserve.p_pf2vf =
- (struct amd_sriov_msg_pf2vf_info_header *)
- (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+ if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+ pfvf_data =
+ kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
+ GFP_KERNEL);
+ if (!pfvf_data) {
+ dev_err(adev->dev, "Failed to allocate memory for pfvf_data\n");
+ return;
+ }
- amdgpu_virt_read_pf2vf_data(adev);
+ if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
+ goto free_pfvf_data;
+
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
+
+ amdgpu_virt_read_pf2vf_data(adev);
+
+free_pfvf_data:
+ kfree(pfvf_data);
+ pfvf_data = NULL;
+ adev->virt.fw_reserve.p_pf2vf = NULL;
+ } else {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+
+ amdgpu_virt_read_pf2vf_data(adev);
+ }
}
}
@@ -701,23 +765,38 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
if (adev->mman.fw_vram_usage_va) {
- adev->virt.fw_reserve.p_pf2vf =
- (struct amd_sriov_msg_pf2vf_info_header *)
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
- adev->virt.fw_reserve.p_vf2pf =
- (struct amd_sriov_msg_vf2pf_info_header *)
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
- adev->virt.fw_reserve.ras_telemetry =
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
+ if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
+ (AMD_SRIOV_MSG_SIZE_KB << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
+ } else {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+ }
} else if (adev->mman.drv_vram_usage_va) {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
- (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
- (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
adev->virt.fw_reserve.ras_telemetry =
- (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
}
amdgpu_virt_read_pf2vf_data(adev);
@@ -816,7 +895,7 @@ static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
break;
default: /* other chip doesn't support SRIOV */
is_sriov = false;
- DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
+ dev_err(adev->dev, "Unknown asic type: %d!\n", adev->asic_type);
break;
}
}
@@ -838,10 +917,220 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
RATELIMIT_MSG_ON_RELEASE);
mutex_init(&adev->virt.ras.ras_telemetry_mutex);
+ mutex_init(&adev->virt.access_req_mutex);
adev->virt.ras.cper_rptr = 0;
}
+static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end)
+{
+ uint32_t sum = 0;
+
+ if (buf_start >= buf_end)
+ return 0;
+
+ for (; buf_start < buf_end; buf_start++)
+ sum += buf_start[0];
+
+ return 0xffffffff - sum;
+}
+
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
+{
+ struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
+ u64 init_hdr_offset = adev->virt.init_data_header.offset;
+ u64 init_hdr_size = (u64)adev->virt.init_data_header.size_kb << 10; /* KB → bytes */
+ u64 vram_size;
+ u64 end;
+ int r = 0;
+ uint8_t checksum = 0;
+
+ /* Skip below init if critical region version != v2 */
+ if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
+ return 0;
+
+ if (init_hdr_offset < 0) {
+ dev_err(adev->dev, "Invalid init header offset\n");
+ return -EINVAL;
+ }
+
+ vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+ if (!vram_size || vram_size == U32_MAX)
+ return -EINVAL;
+ vram_size <<= 20;
+
+ if (check_add_overflow(init_hdr_offset, init_hdr_size, &end) || end > vram_size) {
+ dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n");
+ return -EINVAL;
+ }
+
+ /* Allocate for init_data_hdr */
+ init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), GFP_KERNEL);
+ if (!init_data_hdr)
+ return -ENOMEM;
+
+ amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t *)init_data_hdr,
+ sizeof(struct amd_sriov_msg_init_data_header), false);
+
+ /* Table validation */
+ if (strncmp(init_data_hdr->signature,
+ AMDGPU_SRIOV_CRIT_DATA_SIGNATURE,
+ AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) {
+ dev_err(adev->dev, "Invalid init data signature: %.4s\n",
+ init_data_hdr->signature);
+ r = -EINVAL;
+ goto out;
+ }
+
+ checksum = amdgpu_virt_crit_region_calc_checksum(
+ (uint8_t *)&init_data_hdr->initdata_offset,
+ (uint8_t *)init_data_hdr +
+ sizeof(struct amd_sriov_msg_init_data_header));
+ if (checksum != init_data_hdr->checksum) {
+ dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n",
+ checksum, init_data_hdr->checksum);
+ r = -EINVAL;
+ goto out;
+ }
+
+ memset(&adev->virt.crit_regn, 0, sizeof(adev->virt.crit_regn));
+ memset(adev->virt.crit_regn_tbl, 0, sizeof(adev->virt.crit_regn_tbl));
+
+ adev->virt.crit_regn.offset = init_data_hdr->initdata_offset;
+ adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb;
+
+ /* Validation and initialization for each table entry */
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_IPD_TABLE_ID)) {
+ if (!init_data_hdr->ip_discovery_size_in_kb ||
+ init_data_hdr->ip_discovery_size_in_kb > DISCOVERY_TMR_SIZE) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_IPD_TABLE_ID],
+ init_data_hdr->ip_discovery_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset =
+ init_data_hdr->ip_discovery_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb =
+ init_data_hdr->ip_discovery_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID)) {
+ if (!init_data_hdr->vbios_img_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID],
+ init_data_hdr->vbios_img_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset =
+ init_data_hdr->vbios_img_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb =
+ init_data_hdr->vbios_img_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID)) {
+ if (!init_data_hdr->ras_tele_info_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID],
+ init_data_hdr->ras_tele_info_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset =
+ init_data_hdr->ras_tele_info_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb =
+ init_data_hdr->ras_tele_info_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID)) {
+ if (!init_data_hdr->dataexchange_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID],
+ init_data_hdr->dataexchange_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset =
+ init_data_hdr->dataexchange_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb =
+ init_data_hdr->dataexchange_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID)) {
+ if (!init_data_hdr->bad_page_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID],
+ init_data_hdr->bad_page_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset =
+ init_data_hdr->bad_page_info_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
+ init_data_hdr->bad_page_size_in_kb;
+ }
+
+ /* Validation for critical region info */
+ if (adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb > DISCOVERY_TMR_SIZE) {
+ dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n",
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ /* reserved memory starts from crit region base offset with the size of 5MB */
+ adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
+ adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
+ dev_info(adev->dev,
+ "critical region v%d requested to reserve memory start at %08llx with %llu KB.\n",
+ init_data_hdr->version,
+ adev->mman.fw_vram_usage_start_offset,
+ adev->mman.fw_vram_usage_size >> 10);
+
+ adev->virt.is_dynamic_crit_regn_enabled = true;
+
+out:
+ kfree(init_data_hdr);
+ init_data_hdr = NULL;
+
+ return r;
+}
+
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+ int data_id, uint8_t *binary, u32 *size)
+{
+ uint32_t data_offset = 0;
+ uint32_t data_size = 0;
+ enum amd_sriov_msg_table_id_enum data_table_id = data_id;
+
+ if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
+ return -EINVAL;
+
+ data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
+ data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
+
+ /* Validate on input params */
+ if (!binary || !size || *size < (uint64_t)data_size)
+ return -EINVAL;
+
+ /* Proceed to copy the dynamic content */
+ amdgpu_device_vram_access(adev,
+ (uint64_t)data_offset, (uint32_t *)binary, data_size, false);
+ *size = (uint64_t)data_size;
+
+ dev_dbg(adev->dev,
+ "Got %s info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+ amdgpu_virt_dynamic_crit_table_name[data_id], data_offset, data_size);
+
+ return 0;
+}
+
void amdgpu_virt_init(struct amdgpu_device *adev)
{
bool is_sriov = false;
@@ -1289,7 +1578,7 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block bloc
case AMDGPU_RAS_BLOCK__MPIO:
return RAS_TELEMETRY_GPU_BLOCK_MPIO;
default:
- DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
+ dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n",
block);
return RAS_TELEMETRY_GPU_BLOCK_COUNT;
}
@@ -1304,7 +1593,7 @@ static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
checksum = host_telemetry->header.checksum;
used_size = host_telemetry->header.used_size;
- if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
return 0;
tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
@@ -1383,7 +1672,7 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
checksum = host_telemetry->header.checksum;
used_size = host_telemetry->header.used_size;
- if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
return -EINVAL;
cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
@@ -1515,7 +1804,7 @@ static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
checksum = host_telemetry->header.checksum;
used_size = host_telemetry->header.used_size;
- if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
return 0;
tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);