summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c418
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c244
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c205
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c145
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c388
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c422
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c147
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c640
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c174
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c343
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h92
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sid.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v1_0.c839
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v1_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c5
104 files changed, 4458 insertions, 1380 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 1acfed2f92ef..7f515be5185d 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -43,14 +43,16 @@ config DRM_AMDGPU_SI
bool "Enable amdgpu support for SI parts"
depends on DRM_AMDGPU
help
- Choose this option if you want to enable experimental support
+ Choose this option if you want to enable support
for SI (Southern Islands) asics.
- SI is already supported in radeon. Experimental support for SI
- in amdgpu will be disabled by default and is still provided by
- radeon. Use module options to override this:
+ SI (Southern Islands) are first generation GCN GPUs,
+ supported by both drivers: radeon (old) and amdgpu (new).
+ By default, SI dedicated GPUs are supported by amdgpu.
- radeon.si_support=0 amdgpu.si_support=1
+ Use module options to override this:
+ To use radeon for SI,
+ radeon.si_support=1 amdgpu.si_support=0
config DRM_AMDGPU_CIK
bool "Enable amdgpu support for CIK parts"
@@ -59,11 +61,17 @@ config DRM_AMDGPU_CIK
Choose this option if you want to enable support for CIK (Sea
Islands) asics.
- CIK is already supported in radeon. Support for CIK in amdgpu
- will be disabled by default and is still provided by radeon.
- Use module options to override this:
+ CIK (Sea Islands) are second generation GCN GPUs,
+ supported by both drivers: radeon (old) and amdgpu (new).
+ By default,
+ CIK dedicated GPUs are supported by amdgpu
+ CIK APUs are supported by radeon
+ Use module options to override this:
+ To use amdgpu for CIK,
radeon.cik_support=0 amdgpu.cik_support=1
+ To use radeon for CIK,
+ radeon.cik_support=1 amdgpu.cik_support=0
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 64e7acff8f18..c88760fb52ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -37,7 +37,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH)/modules/inc \
-I$(FULL_AMD_DISPLAY_PATH)/dc \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
- -I$(FULL_AMD_PATH)/amdkfd
+ -I$(FULL_AMD_PATH)/amdkfd \
+ -I$(FULL_AMD_PATH)/ras/ras_mgr
# Locally disable W=1 warnings enabled in drm subsystem Makefile
subdir-ccflags-y += -Wno-override-init
@@ -77,7 +78,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o \
dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o \
- uvd_v3_1.o
+ uvd_v3_1.o vce_v1_0.o
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
@@ -324,4 +325,9 @@ amdgpu-y += \
isp_v4_1_1.o
endif
+AMD_GPU_RAS_PATH := ../ras
+AMD_GPU_RAS_FULL_PATH := $(FULL_AMD_PATH)/ras
+include $(AMD_GPU_RAS_FULL_PATH)/Makefile
+amdgpu-y += $(AMD_GPU_RAS_FILES)
+
obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index 9569dc16dd3d..daa7b23bc775 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -88,6 +88,10 @@ static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev)
uint32_t ip_block;
int r, i;
+ /* Skip suspend of SDMA IP versions >= 4.4.2. They are multi-aid */
+ if (adev->aid_mask)
+ ip_block_mask &= ~BIT(AMD_IP_BLOCK_TYPE_SDMA);
+
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6f5b4a0e0a34..9f9774f58ce1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -372,13 +372,15 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
u64 *flags);
int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
+bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type);
bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block);
int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block);
-#define AMDGPU_MAX_IP_NUM 16
+#define AMDGPU_MAX_IP_NUM AMD_IP_BLOCK_TYPE_NUM
struct amdgpu_ip_block_status {
bool valid;
@@ -839,8 +841,6 @@ struct amd_powerplay {
const struct amd_pm_funcs *pp_funcs;
};
-struct ip_discovery_top;
-
/* polaris10 kickers */
#define ASICID_IS_P20(did, rid) (((did == 0x67DF) && \
((rid == 0xE3) || \
@@ -972,8 +972,7 @@ struct amdgpu_device {
struct notifier_block acpi_nb;
struct notifier_block pm_nb;
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
- struct debugfs_blob_wrapper debugfs_vbios_blob;
- struct debugfs_blob_wrapper debugfs_discovery_blob;
+ struct debugfs_blob_wrapper debugfs_vbios_blob;
struct mutex srbm_mutex;
/* GRBM index mutex. Protects concurrent access to GRBM index */
struct mutex grbm_idx_mutex;
@@ -1063,6 +1062,9 @@ struct amdgpu_device {
u32 log2_max_MBps;
} mm_stats;
+ /* discovery*/
+ struct amdgpu_discovery_info discovery;
+
/* display */
bool enable_virtual_display;
struct amdgpu_vkms_output *amdgpu_vkms_output;
@@ -1174,6 +1176,12 @@ struct amdgpu_device {
* queue fence.
*/
struct xarray userq_xa;
+ /**
+ * @userq_doorbell_xa: Global user queue map (doorbell index → queue)
+ * Key: doorbell_index (unique global identifier for the queue)
+ * Value: struct amdgpu_usermode_queue
+ */
+ struct xarray userq_doorbell_xa;
/* df */
struct amdgpu_df df;
@@ -1265,8 +1273,6 @@ struct amdgpu_device {
struct list_head ras_list;
- struct ip_discovery_top *ip_top;
-
struct amdgpu_reset_domain *reset_domain;
struct mutex benchmark_mutex;
@@ -1309,9 +1315,8 @@ struct amdgpu_device {
*/
bool apu_prefer_gtt;
- struct list_head userq_mgr_list;
- struct mutex userq_mutex;
bool userq_halt_for_enforce_isolation;
+ struct work_struct userq_reset_work;
struct amdgpu_uid *uid_info;
/* KFD
@@ -1535,11 +1540,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
-#define amdgpu_asic_flush_hdp(adev, r) \
- ((adev)->asic_funcs->flush_hdp ? (adev)->asic_funcs->flush_hdp((adev), (r)) : (adev)->hdp.funcs->flush_hdp((adev), (r)))
-#define amdgpu_asic_invalidate_hdp(adev, r) \
- ((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs->invalidate_hdp((adev), (r)) : \
- ((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : (void)0))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
@@ -1638,7 +1638,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
void amdgpu_driver_release_kms(struct drm_device *dev);
-int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
int amdgpu_device_prepare(struct drm_device *dev);
void amdgpu_device_complete(struct drm_device *dev);
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 4926996f94da..381ef205b0df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -302,17 +302,19 @@ static int acp_hw_init(struct amdgpu_ip_block *ip_block)
adev->acp.acp_res[2].end = adev->acp.acp_res[2].start;
adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].id = 0;
adev->acp.acp_cell[0].num_resources = 3;
adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
adev->acp.acp_cell[0].platform_data = &adev->asic_type;
adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].id = 1;
adev->acp.acp_cell[1].num_resources = 1;
adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
- r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, 2);
+ r = mfd_add_devices(adev->acp.parent, 0, adev->acp.acp_cell, 2, NULL, 0, NULL);
if (r)
goto failure;
r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
@@ -410,30 +412,34 @@ static int acp_hw_init(struct amdgpu_ip_block *ip_block)
adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].id = 0;
adev->acp.acp_cell[0].num_resources = 5;
adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
adev->acp.acp_cell[0].platform_data = &adev->asic_type;
adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].id = 1;
adev->acp.acp_cell[1].num_resources = 1;
adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
adev->acp.acp_cell[2].name = "designware-i2s";
+ adev->acp.acp_cell[2].id = 2;
adev->acp.acp_cell[2].num_resources = 1;
adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
adev->acp.acp_cell[3].name = "designware-i2s";
+ adev->acp.acp_cell[3].id = 3;
adev->acp.acp_cell[3].num_resources = 1;
adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
- r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS);
+ r = mfd_add_devices(adev->acp.parent, 0, adev->acp.acp_cell, ACP_DEVS, NULL, 0, NULL);
if (r)
goto failure;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 6c62e27b9800..d31460a9e958 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -507,7 +507,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
pm_runtime_get_sync(adev_to_drm(adev)->dev);
/* Just fire off a uevent and let userspace tell us what to do */
drm_helper_hpd_irq_event(adev_to_drm(adev));
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 9e120c934cc1..8bdfcde2029b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -71,7 +71,7 @@ struct kgd_mem {
struct mutex lock;
struct amdgpu_bo *bo;
struct dma_buf *dmabuf;
- struct hmm_range *range;
+ struct amdgpu_hmm_range *range;
struct list_head attachments;
/* protected by amdkfd_process_info.lock */
struct list_head validate_list;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 923f0fa7350c..b1c24c8fa686 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1057,7 +1057,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
struct ttm_operation_ctx ctx = { true, false };
- struct hmm_range *range;
+ struct amdgpu_hmm_range *range;
int ret = 0;
mutex_lock(&process_info->lock);
@@ -1089,8 +1089,15 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
return 0;
}
- ret = amdgpu_ttm_tt_get_user_pages(bo, &range);
+ range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!range)) {
+ ret = -ENOMEM;
+ goto unregister_out;
+ }
+
+ ret = amdgpu_ttm_tt_get_user_pages(bo, range);
if (ret) {
+ amdgpu_hmm_range_free(range);
if (ret == -EAGAIN)
pr_debug("Failed to get user pages, try again\n");
else
@@ -1113,7 +1120,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
amdgpu_bo_unreserve(bo);
release_out:
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
+ amdgpu_hmm_range_free(range);
unregister_out:
if (ret)
amdgpu_hmm_unregister(bo);
@@ -1920,7 +1927,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
amdgpu_hmm_unregister(mem->bo);
mutex_lock(&process_info->notifier_lock);
- amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range);
+ amdgpu_hmm_range_free(mem->range);
mutex_unlock(&process_info->notifier_lock);
}
@@ -1958,9 +1965,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
*/
if (size) {
if (!is_imported &&
- (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
- (adev->apu_prefer_gtt &&
- mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
*size = bo_size;
else
*size = 0;
@@ -2546,7 +2551,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
bo = mem->bo;
- amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range);
+ amdgpu_hmm_range_free(mem->range);
mem->range = NULL;
/* BO reservations and getting user pages (hmm_range_fault)
@@ -2570,9 +2575,14 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
}
}
+ mem->range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!mem->range))
+ return -ENOMEM;
/* Get updated user pages */
- ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, mem->range);
if (ret) {
+ amdgpu_hmm_range_free(mem->range);
+ mem->range = NULL;
pr_debug("Failed %d to get user pages\n", ret);
/* Return -EFAULT bad address error as success. It will
@@ -2745,8 +2755,8 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
continue;
/* Only check mem with hmm range associated */
- valid = amdgpu_ttm_tt_get_user_pages_done(
- mem->bo->tbo.ttm, mem->range);
+ valid = amdgpu_hmm_range_valid(mem->range);
+ amdgpu_hmm_range_free(mem->range);
mem->range = NULL;
if (!valid) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index c7d32fb216e4..636385c80f64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -181,19 +181,22 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
u8 frev, crev;
int usage_bytes = 0;
- if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
- if (frev == 2 && crev == 1) {
- fw_usage_v2_1 =
- (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
- amdgpu_atomfirmware_allocate_fb_v2_1(adev,
- fw_usage_v2_1,
- &usage_bytes);
- } else if (frev >= 2 && crev >= 2) {
- fw_usage_v2_2 =
- (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
- amdgpu_atomfirmware_allocate_fb_v2_2(adev,
- fw_usage_v2_2,
- &usage_bytes);
+ /* Skip atomfirmware allocation for SRIOV VFs when dynamic crit regn is enabled */
+ if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) {
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
+ if (frev == 2 && crev == 1) {
+ fw_usage_v2_1 =
+ (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
+ amdgpu_atomfirmware_allocate_fb_v2_1(adev,
+ fw_usage_v2_1,
+ &usage_bytes);
+ } else if (frev >= 2 && crev >= 2) {
+ fw_usage_v2_2 =
+ (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
+ amdgpu_atomfirmware_allocate_fb_v2_2(adev,
+ fw_usage_v2_2,
+ &usage_bytes);
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 00e96419fcda..35d04e69aec0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -96,13 +96,14 @@ void amdgpu_bios_release(struct amdgpu_device *adev)
* part of the system bios. On boot, the system bios puts a
* copy of the igp rom at the start of vram if a discrete card is
* present.
- * For SR-IOV, the vbios image is also put in VRAM in the VF.
+ * For SR-IOV, if dynamic critical region is not enabled,
+ * the vbios image is also put at the start of VRAM in the VF.
*/
static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
{
- uint8_t __iomem *bios;
+ uint8_t __iomem *bios = NULL;
resource_size_t vram_base;
- resource_size_t size = 256 * 1024; /* ??? */
+ u32 size = 256U * 1024U; /* ??? */
if (!(adev->flags & AMD_IS_APU))
if (amdgpu_device_need_post(adev))
@@ -114,18 +115,33 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
adev->bios = NULL;
vram_base = pci_resource_start(adev->pdev, 0);
- bios = ioremap_wc(vram_base, size);
- if (!bios)
- return false;
adev->bios = kmalloc(size, GFP_KERNEL);
- if (!adev->bios) {
- iounmap(bios);
+ if (!adev->bios)
return false;
+
+ /* For SRIOV with dynamic critical region is enabled,
+ * the vbios image is put at a dynamic offset of VRAM in the VF.
+ * If dynamic critical region is disabled, follow the existing logic as on baremetal.
+ */
+ if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+ if (amdgpu_virt_get_dynamic_data_info(adev,
+ AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, adev->bios, &size)) {
+ amdgpu_bios_release(adev);
+ return false;
+ }
+ } else {
+ bios = ioremap_wc(vram_base, size);
+ if (!bios) {
+ amdgpu_bios_release(adev);
+ return false;
+ }
+
+ memcpy_fromio(adev->bios, bios, size);
+ iounmap(bios);
}
+
adev->bios_size = size;
- memcpy_fromio(adev->bios, bios, size);
- iounmap(bios);
if (!check_atom_bios(adev, size)) {
amdgpu_bios_release(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index a716c9886c74..2b5e7c46a39d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -38,7 +38,7 @@ struct amdgpu_bo_list_entry {
struct amdgpu_bo *bo;
struct amdgpu_bo_va *bo_va;
uint32_t priority;
- struct hmm_range *range;
+ struct amdgpu_hmm_range *range;
bool user_invalidated;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 47e9bfba0642..9f96d568acf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -734,10 +734,8 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
return ret;
}
@@ -919,10 +917,8 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
out:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
return ret;
}
@@ -1146,10 +1142,8 @@ out:
amdgpu_connector_update_scratch_regs(connector, ret);
exit:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
return ret;
}
@@ -1486,10 +1480,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
out:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
connector->connector_type == DRM_MODE_CONNECTOR_eDP)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2f6a96af7fb1..ecdfe6cb36cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -29,7 +29,6 @@
#include <linux/pagemap.h>
#include <linux/sync_file.h>
#include <linux/dma-buf.h>
-#include <linux/hmm.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
@@ -41,6 +40,7 @@
#include "amdgpu_gmc.h"
#include "amdgpu_gem.h"
#include "amdgpu_ras.h"
+#include "amdgpu_hmm.h"
static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
struct amdgpu_device *adev,
@@ -891,12 +891,17 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
bool userpage_invalidated = false;
struct amdgpu_bo *bo = e->bo;
- r = amdgpu_ttm_tt_get_user_pages(bo, &e->range);
+ e->range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!e->range))
+ return -ENOMEM;
+
+ r = amdgpu_ttm_tt_get_user_pages(bo, e->range);
if (r)
goto out_free_user_pages;
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
- if (bo->tbo.ttm->pages[i] != hmm_pfn_to_page(e->range->hmm_pfns[i])) {
+ if (bo->tbo.ttm->pages[i] !=
+ hmm_pfn_to_page(e->range->hmm_range.hmm_pfns[i])) {
userpage_invalidated = true;
break;
}
@@ -990,9 +995,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
out_free_user_pages:
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = e->bo;
-
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
+ amdgpu_hmm_range_free(e->range);
e->range = NULL;
}
mutex_unlock(&p->bo_list->bo_list_mutex);
@@ -1323,8 +1326,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
*/
r = 0;
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm,
- e->range);
+ r |= !amdgpu_hmm_range_valid(e->range);
+ amdgpu_hmm_range_free(e->range);
e->range = NULL;
}
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a70651050acf..62d43b8cbe58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -129,7 +129,6 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
if (use_bank) {
if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
(se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return -EINVAL;
@@ -179,7 +178,6 @@ end:
if (pm_pg_lock)
mutex_unlock(&adev->pm.mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
@@ -255,7 +253,6 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
if (rd->id.use_grbm) {
if ((rd->id.grbm.sh != 0xFFFFFFFF && rd->id.grbm.sh >= adev->gfx.config.max_sh_per_se) ||
(rd->id.grbm.se != 0xFFFFFFFF && rd->id.grbm.se >= adev->gfx.config.max_shader_engines)) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
mutex_unlock(&rd->lock);
@@ -310,7 +307,6 @@ end:
mutex_unlock(&rd->lock);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
@@ -446,7 +442,6 @@ static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, siz
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
if (!x) {
@@ -557,7 +552,6 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
@@ -617,7 +611,6 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
@@ -676,7 +669,6 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
@@ -736,7 +728,6 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
@@ -795,7 +786,6 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
@@ -855,7 +845,6 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
@@ -1003,7 +992,6 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
if (r) {
@@ -1094,7 +1082,6 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
mutex_unlock(&adev->grbm_idx_mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
if (!x) {
@@ -1192,7 +1179,6 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
mutex_unlock(&adev->grbm_idx_mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
while (size) {
@@ -1266,7 +1252,6 @@ static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -1315,7 +1300,6 @@ static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -1365,7 +1349,6 @@ static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -1414,7 +1397,6 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -1460,7 +1442,6 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -1501,7 +1482,6 @@ static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *bu
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -1701,7 +1681,6 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
up_write(&adev->reset_domain->sem);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
@@ -1721,7 +1700,6 @@ static int amdgpu_debugfs_evict_vram(void *data, u64 *val)
*val = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
@@ -1742,7 +1720,6 @@ static int amdgpu_debugfs_evict_gtt(void *data, u64 *val)
*val = amdgpu_ttm_evict_resources(adev, TTM_PL_TT);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
@@ -1762,7 +1739,6 @@ static int amdgpu_debugfs_benchmark(void *data, u64 val)
r = amdgpu_benchmark(adev, val);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return r;
@@ -1902,7 +1878,7 @@ no_preempt:
continue;
}
job = to_amdgpu_job(s_job);
- if (preempted && (&job->hw_fence.base) == fence)
+ if (preempted && (&job->hw_fence->base) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
@@ -2014,7 +1990,6 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
ret = -EINVAL;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return ret;
@@ -2123,10 +2098,9 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
debugfs_create_blob("amdgpu_vbios", 0444, root,
&adev->debugfs_vbios_blob);
- adev->debugfs_discovery_blob.data = adev->mman.discovery_bin;
- adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size;
- debugfs_create_blob("amdgpu_discovery", 0444, root,
- &adev->debugfs_discovery_blob);
+ if (adev->discovery.debugfs_blob.size)
+ debugfs_create_blob("amdgpu_discovery", 0444, root,
+ &adev->discovery.debugfs_blob);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 96b6738e6252..a1817b4b5173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -71,6 +71,7 @@
#include "amdgpu_xgmi.h"
#include "amdgpu_ras.h"
+#include "amdgpu_ras_mgr.h"
#include "amdgpu_pmu.h"
#include "amdgpu_fru_eeprom.h"
#include "amdgpu_reset.h"
@@ -179,6 +180,10 @@ struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
BIT(AMD_IP_BLOCK_TYPE_PSP)
};
+static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
+static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
+static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
+
static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
@@ -2387,7 +2392,7 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
}
/**
- * amdgpu_device_ip_is_valid - is the hardware IP enabled
+ * amdgpu_device_ip_is_hw - is the hardware IP enabled
*
* @adev: amdgpu_device pointer
* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
@@ -2395,6 +2400,27 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
* Check if the hardware IP is enable or not.
* Returns true if it the IP is enable, false if not.
*/
+bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
+{
+ int i;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == block_type)
+ return adev->ip_blocks[i].status.hw;
+ }
+ return false;
+}
+
+/**
+ * amdgpu_device_ip_is_valid - is the hardware IP valid
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Check if the hardware IP is valid or not.
+ * Returns true if it the IP is valid, false if not.
+ */
bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
enum amd_ip_block_type block_type)
{
@@ -2473,6 +2499,7 @@ static const char *ip_block_names[] = {
[AMD_IP_BLOCK_TYPE_VPE] = "vpe",
[AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm",
[AMD_IP_BLOCK_TYPE_ISP] = "isp",
+ [AMD_IP_BLOCK_TYPE_RAS] = "ras",
};
static const char *ip_block_name(struct amdgpu_device *adev, enum amd_ip_block_type type)
@@ -2633,12 +2660,12 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
chip_name = "arcturus";
break;
case CHIP_NAVI12:
- if (adev->mman.discovery_bin)
+ if (adev->discovery.bin)
return 0;
chip_name = "navi12";
break;
case CHIP_CYAN_SKILLFISH:
- if (adev->mman.discovery_bin)
+ if (adev->discovery.bin)
return 0;
chip_name = "cyan_skillfish";
break;
@@ -2763,6 +2790,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return r;
+
+ r = amdgpu_virt_init_critical_region(adev);
+ if (r)
+ return r;
}
switch (adev->asic_type) {
@@ -3652,6 +3683,20 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
"failed to release exclusive mode on fini\n");
}
+ /*
+ * Driver reload on the APU can fail due to firmware validation because
+ * the PSP is always running, as it is shared across the whole SoC.
+ * This same issue does not occur on dGPU because it has a mechanism
+ * that checks whether the PSP is running. A solution for those issues
+ * in the APU is to trigger a GPU reset, but this should be done during
+ * the unload phase to avoid adding boot latency and screen flicker.
+ */
+ if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) {
+ r = amdgpu_asic_reset(adev);
+ if (r)
+ dev_err(adev->dev, "asic reset on %s failed\n", __func__);
+ }
+
return 0;
}
@@ -3762,7 +3807,7 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
*/
static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
{
- int i, r;
+ int i, r, rec;
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
@@ -3783,13 +3828,25 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
continue;
- /* XXX handle errors */
r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
if (r)
- return r;
+ goto unwind;
}
return 0;
+unwind:
+ rec = amdgpu_device_ip_resume_phase3(adev);
+ if (rec)
+ dev_err(adev->dev,
+ "amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
+ rec);
+
+ amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+
+ return r;
}
/**
@@ -3805,7 +3862,7 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
*/
static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
{
- int i, r;
+ int i, r, rec;
if (adev->in_s0ix)
amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
@@ -3866,9 +3923,9 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
continue;
- /* XXX handle errors */
r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
- adev->ip_blocks[i].status.hw = false;
+ if (r)
+ goto unwind;
/* handle putting the SMC in the appropriate state */
if (!amdgpu_sriov_vf(adev)) {
@@ -3878,13 +3935,40 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
dev_err(adev->dev,
"SMC failed to set mp1 state %d, %d\n",
adev->mp1_state, r);
- return r;
+ goto unwind;
}
}
}
}
return 0;
+unwind:
+ /* suspend phase 2 = resume phase 1 + resume phase 2 */
+ rec = amdgpu_device_ip_resume_phase1(adev);
+ if (rec) {
+ dev_err(adev->dev,
+ "amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
+ rec);
+ return r;
+ }
+
+ rec = amdgpu_device_fw_loading(adev);
+ if (rec) {
+ dev_err(adev->dev,
+ "amdgpu_device_fw_loading failed during unwind: %d\n",
+ rec);
+ return r;
+ }
+
+ rec = amdgpu_device_ip_resume_phase2(adev);
+ if (rec) {
+ dev_err(adev->dev,
+ "amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
+ rec);
+ return r;
+ }
+
+ return r;
}
/**
@@ -3898,7 +3982,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
* in each IP into a state suitable for suspend.
* Returns 0 on success, negative error code on failure.
*/
-int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
+static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
{
int r;
@@ -4182,25 +4266,13 @@ bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
case CHIP_PITCAIRN:
case CHIP_VERDE:
case CHIP_OLAND:
- /*
- * We have systems in the wild with these ASICs that require
- * LVDS and VGA support which is not supported with DC.
- *
- * Fallback to the non-DC driver here by default so as not to
- * cause regressions.
- */
-#if defined(CONFIG_DRM_AMD_DC_SI)
- return amdgpu_dc > 0;
-#else
- return false;
-#endif
- case CHIP_BONAIRE:
+ return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
case CHIP_KAVERI:
case CHIP_KABINI:
case CHIP_MULLINS:
/*
* We have systems in the wild with these ASICs that require
- * VGA support which is not supported with DC.
+ * TRAVIS and NUTMEG support which is not supported with DC.
*
* Fallback to the non-DC driver here by default so as not to
* cause regressions.
@@ -4288,58 +4360,53 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
long timeout;
int ret = 0;
- /*
- * By default timeout for jobs is 10 sec
- */
- adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000);
- adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+ /* By default timeout for all queues is 2 sec */
+ adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
+ adev->video_timeout = msecs_to_jiffies(2000);
- if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
- while ((timeout_setting = strsep(&input, ",")) &&
- strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
- ret = kstrtol(timeout_setting, 0, &timeout);
- if (ret)
- return ret;
+ if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
+ return 0;
- if (timeout == 0) {
- index++;
- continue;
- } else if (timeout < 0) {
- timeout = MAX_SCHEDULE_TIMEOUT;
- dev_warn(adev->dev, "lockup timeout disabled");
- add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
- } else {
- timeout = msecs_to_jiffies(timeout);
- }
+ while ((timeout_setting = strsep(&input, ",")) &&
+ strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+ ret = kstrtol(timeout_setting, 0, &timeout);
+ if (ret)
+ return ret;
- switch (index++) {
- case 0:
- adev->gfx_timeout = timeout;
- break;
- case 1:
- adev->compute_timeout = timeout;
- break;
- case 2:
- adev->sdma_timeout = timeout;
- break;
- case 3:
- adev->video_timeout = timeout;
- break;
- default:
- break;
- }
+ if (timeout == 0) {
+ index++;
+ continue;
+ } else if (timeout < 0) {
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ dev_warn(adev->dev, "lockup timeout disabled");
+ add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
+ } else {
+ timeout = msecs_to_jiffies(timeout);
}
- /*
- * There is only one value specified and
- * it should apply to all non-compute jobs.
- */
- if (index == 1) {
- adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
- if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
- adev->compute_timeout = adev->gfx_timeout;
+
+ switch (index++) {
+ case 0:
+ adev->gfx_timeout = timeout;
+ break;
+ case 1:
+ adev->compute_timeout = timeout;
+ break;
+ case 2:
+ adev->sdma_timeout = timeout;
+ break;
+ case 3:
+ adev->video_timeout = timeout;
+ break;
+ default:
+ break;
}
}
+ /* When only one value specified apply it to all queues. */
+ if (index == 1)
+ adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
+ adev->video_timeout = timeout;
+
return ret;
}
@@ -4394,6 +4461,55 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
dev_info(adev->dev, "MCBP is enabled\n");
}
+static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_atombios_sysfs_init(adev);
+ if (r)
+ drm_err(&adev->ddev,
+ "registering atombios sysfs failed (%d).\n", r);
+
+ r = amdgpu_pm_sysfs_init(adev);
+ if (r)
+ dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
+
+ r = amdgpu_ucode_sysfs_init(adev);
+ if (r) {
+ adev->ucode_sysfs_en = false;
+ dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
+ } else
+ adev->ucode_sysfs_en = true;
+
+ r = amdgpu_device_attr_sysfs_init(adev);
+ if (r)
+ dev_err(adev->dev, "Could not create amdgpu device attr\n");
+
+ r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
+ if (r)
+ dev_err(adev->dev,
+ "Could not create amdgpu board attributes\n");
+
+ amdgpu_fru_sysfs_init(adev);
+ amdgpu_reg_state_sysfs_init(adev);
+ amdgpu_xcp_sysfs_init(adev);
+
+ return r;
+}
+
+static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
+{
+ if (adev->pm.sysfs_initialized)
+ amdgpu_pm_sysfs_fini(adev);
+ if (adev->ucode_sysfs_en)
+ amdgpu_ucode_sysfs_fini(adev);
+ amdgpu_device_attr_sysfs_fini(adev);
+ amdgpu_fru_sysfs_fini(adev);
+
+ amdgpu_reg_state_sysfs_fini(adev);
+ amdgpu_xcp_sysfs_fini(adev);
+}
+
/**
* amdgpu_device_init - initialize the driver
*
@@ -4493,7 +4609,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->gfx.userq_sch_mutex);
mutex_init(&adev->gfx.workload_profile_mutex);
mutex_init(&adev->vcn.workload_profile_mutex);
- mutex_init(&adev->userq_mutex);
amdgpu_device_init_apu_flags(adev);
@@ -4521,7 +4636,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(&adev->pm.od_kobj_list);
- INIT_LIST_HEAD(&adev->userq_mgr_list);
+ xa_init(&adev->userq_doorbell_xa);
INIT_DELAYED_WORK(&adev->delayed_init_work,
amdgpu_device_delayed_init_work_handler);
@@ -4544,6 +4659,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
+ INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
adev->gfx.gfx_off_req_count = 1;
adev->gfx.gfx_off_residency = 0;
@@ -4817,39 +4933,14 @@ fence_driver_init:
flush_delayed_work(&adev->delayed_init_work);
}
+ if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
+ amdgpu_xgmi_reset_on_init(adev);
/*
* Place those sysfs registering after `late_init`. As some of those
* operations performed in `late_init` might affect the sysfs
* interfaces creating.
*/
- r = amdgpu_atombios_sysfs_init(adev);
- if (r)
- drm_err(&adev->ddev,
- "registering atombios sysfs failed (%d).\n", r);
-
- r = amdgpu_pm_sysfs_init(adev);
- if (r)
- dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
-
- r = amdgpu_ucode_sysfs_init(adev);
- if (r) {
- adev->ucode_sysfs_en = false;
- dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
- } else
- adev->ucode_sysfs_en = true;
-
- r = amdgpu_device_attr_sysfs_init(adev);
- if (r)
- dev_err(adev->dev, "Could not create amdgpu device attr\n");
-
- r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
- if (r)
- dev_err(adev->dev,
- "Could not create amdgpu board attributes\n");
-
- amdgpu_fru_sysfs_init(adev);
- amdgpu_reg_state_sysfs_init(adev);
- amdgpu_xcp_sysfs_init(adev);
+ r = amdgpu_device_sys_interface_init(adev);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
r = amdgpu_pmu_init(adev);
@@ -4877,9 +4968,6 @@ fence_driver_init:
if (px)
vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
- if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
- amdgpu_xgmi_reset_on_init(adev);
-
amdgpu_device_check_iommu_direct_map(adev);
adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
@@ -4971,15 +5059,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
}
amdgpu_fence_driver_hw_fini(adev);
- if (adev->pm.sysfs_initialized)
- amdgpu_pm_sysfs_fini(adev);
- if (adev->ucode_sysfs_en)
- amdgpu_ucode_sysfs_fini(adev);
- amdgpu_device_attr_sysfs_fini(adev);
- amdgpu_fru_sysfs_fini(adev);
-
- amdgpu_reg_state_sysfs_fini(adev);
- amdgpu_xcp_sysfs_fini(adev);
+ amdgpu_device_sys_interface_fini(adev);
/* disable ras feature must before hw fini */
amdgpu_ras_pre_fini(adev);
@@ -5054,7 +5134,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
- if (adev->mman.discovery_bin)
+ if (adev->discovery.bin)
amdgpu_discovery_fini(adev);
amdgpu_reset_put_reset_domain(adev->reset_domain);
@@ -5202,7 +5282,7 @@ void amdgpu_device_complete(struct drm_device *dev)
int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
{
struct amdgpu_device *adev = drm_to_adev(dev);
- int r = 0;
+ int r, rec;
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
@@ -5218,35 +5298,92 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
return r;
}
- if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3))
- dev_warn(adev->dev, "smart shift update failed\n");
+ r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
+ if (r)
+ goto unwind_sriov;
if (notify_clients)
- drm_client_dev_suspend(adev_to_drm(adev), false);
+ drm_client_dev_suspend(adev_to_drm(adev));
cancel_delayed_work_sync(&adev->delayed_init_work);
amdgpu_ras_suspend(adev);
- amdgpu_device_ip_suspend_phase1(adev);
+ r = amdgpu_device_ip_suspend_phase1(adev);
+ if (r)
+ goto unwind_smartshift;
amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
- amdgpu_userq_suspend(adev);
+ r = amdgpu_userq_suspend(adev);
+ if (r)
+ goto unwind_ip_phase1;
r = amdgpu_device_evict_resources(adev);
if (r)
- return r;
+ goto unwind_userq;
amdgpu_ttm_set_buffer_funcs_status(adev, false);
amdgpu_fence_driver_hw_fini(adev);
- amdgpu_device_ip_suspend_phase2(adev);
+ r = amdgpu_device_ip_suspend_phase2(adev);
+ if (r)
+ goto unwind_evict;
if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, false);
return 0;
+
+unwind_evict:
+ if (adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ amdgpu_fence_driver_hw_init(adev);
+
+unwind_userq:
+ rec = amdgpu_userq_resume(adev);
+ if (rec) {
+ dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
+ return r;
+ }
+ rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+ if (rec) {
+ dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
+ return r;
+ }
+
+unwind_ip_phase1:
+ /* suspend phase 1 = resume phase 3 */
+ rec = amdgpu_device_ip_resume_phase3(adev);
+ if (rec) {
+ dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
+ return r;
+ }
+
+unwind_smartshift:
+ rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
+ if (rec) {
+ dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
+ return r;
+ }
+
+ if (notify_clients)
+ drm_client_dev_resume(adev_to_drm(adev));
+
+ amdgpu_ras_resume(adev);
+
+unwind_sriov:
+ if (amdgpu_sriov_vf(adev)) {
+ rec = amdgpu_virt_request_full_gpu(adev, true);
+ if (rec) {
+ dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
+ return r;
+ }
+ }
+
+ adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
+
+ return r;
}
static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
@@ -5352,7 +5489,7 @@ exit:
flush_delayed_work(&adev->delayed_init_work);
if (notify_clients)
- drm_client_dev_resume(adev_to_drm(adev), false);
+ drm_client_dev_resume(adev_to_drm(adev));
amdgpu_ras_resume(adev);
@@ -5808,11 +5945,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
if (!amdgpu_ring_sched_ready(ring))
continue;
- /* Clear job fence from fence drv to avoid force_completion
- * leave NULL and vm flush fence in fence drv
- */
- amdgpu_fence_driver_clear_job_fences(ring);
-
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(ring);
}
@@ -5957,7 +6089,11 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
if (r)
goto out;
- drm_client_dev_resume(adev_to_drm(tmp_adev), false);
+ r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
+ if (r)
+ goto out;
+
+ drm_client_dev_resume(adev_to_drm(tmp_adev));
/*
* The GPU enters bad state once faulty pages
@@ -6179,6 +6315,7 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev))
cancel_work(&adev->reset_work);
#endif
+ cancel_work(&adev->userq_reset_work);
if (adev->kfd.dev)
cancel_work(&adev->kfd.reset_work);
@@ -6292,13 +6429,15 @@ static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
*/
amdgpu_unregister_gpu_instance(tmp_adev);
- drm_client_dev_suspend(adev_to_drm(tmp_adev), false);
+ drm_client_dev_suspend(adev_to_drm(tmp_adev));
/* disable ras on ALL IPs */
if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
+ amdgpu_userq_pre_reset(tmp_adev);
+
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
@@ -6528,6 +6667,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
goto end_reset;
}
+ /* Cannot be called after locking reset domain */
+ amdgpu_ras_pre_reset(adev, &device_list);
+
/* We need to lock reset domain only once both for XGMI and single device */
amdgpu_device_recovery_get_reset_lock(adev, &device_list);
@@ -6541,7 +6683,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
*
* job->base holds a reference to parent fence
*/
- if (job && dma_fence_is_signaled(&job->hw_fence.base)) {
+ if (job && dma_fence_is_signaled(&job->hw_fence->base)) {
job_signaled = true;
dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
goto skip_hw_reset;
@@ -6558,6 +6700,7 @@ skip_sched_resume:
amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
reset_unlock:
amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+ amdgpu_ras_post_reset(adev, &device_list);
end_reset:
if (hive) {
mutex_unlock(&hive->hive_lock);
@@ -7285,10 +7428,17 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
if (adev->gmc.xgmi.connected_to_cpu)
return;
- if (ring && ring->funcs->emit_hdp_flush)
+ if (ring && ring->funcs->emit_hdp_flush) {
amdgpu_ring_emit_hdp_flush(ring);
- else
- amdgpu_asic_flush_hdp(adev, ring);
+ return;
+ }
+
+ if (!ring && amdgpu_sriov_runtime(adev)) {
+ if (!amdgpu_kiq_hdp_flush(adev))
+ return;
+ }
+
+ amdgpu_hdp_flush(adev, ring);
}
void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
@@ -7301,7 +7451,7 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
if (adev->gmc.xgmi.connected_to_cpu)
return;
- amdgpu_asic_invalidate_hdp(adev, ring);
+ amdgpu_hdp_invalidate(adev, ring);
}
int amdgpu_in_reset(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index dd7b2b796427..fa2a22dfa048 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -107,6 +107,7 @@
#include "vcn_v5_0_1.h"
#include "jpeg_v5_0_0.h"
#include "jpeg_v5_0_1.h"
+#include "amdgpu_ras_mgr.h"
#include "amdgpu_vpe.h"
#if defined(CONFIG_DRM_AMD_ISP)
@@ -254,9 +255,9 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev,
pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET;
/* This region is read-only and reserved from system use */
- discv_regn = memremap(pos, adev->mman.discovery_tmr_size, MEMREMAP_WC);
+ discv_regn = memremap(pos, adev->discovery.size, MEMREMAP_WC);
if (discv_regn) {
- memcpy(binary, discv_regn, adev->mman.discovery_tmr_size);
+ memcpy(binary, discv_regn, adev->discovery.size);
memunmap(discv_regn);
return 0;
}
@@ -298,10 +299,31 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
else
vram_size <<= 20;
+ /*
+ * If in VRAM, discovery TMR is marked for reservation. If it is in system mem,
+ * then it is not required to be reserved.
+ */
if (sz_valid) {
- uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
- amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
- adev->mman.discovery_tmr_size, false);
+ if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+ /* For SRIOV VFs with dynamic critical region enabled,
+ * we will get the IPD binary via below call.
+ * If dynamic critical is disabled, fall through to normal seq.
+ */
+ if (amdgpu_virt_get_dynamic_data_info(adev,
+ AMD_SRIOV_MSG_IPD_TABLE_ID, binary,
+ &adev->discovery.size)) {
+ dev_err(adev->dev,
+ "failed to read discovery info from dynamic critical region.");
+ ret = -EINVAL;
+ goto exit;
+ }
+ } else {
+ uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+
+ amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
+ adev->discovery.size, false);
+ adev->discovery.reserve_tmr = true;
+ }
} else {
ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
}
@@ -310,7 +332,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
dev_err(adev->dev,
"failed to read discovery info from memory, vram size read: %llx",
vram_size);
-
+exit:
return ret;
}
@@ -389,6 +411,7 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
struct binary_header *bhdr)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct table_info *info;
uint16_t checksum;
uint16_t offset;
@@ -398,14 +421,14 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
checksum = le16_to_cpu(info->checksum);
struct nps_info_header *nhdr =
- (struct nps_info_header *)(adev->mman.discovery_bin + offset);
+ (struct nps_info_header *)(discovery_bin + offset);
if (le32_to_cpu(nhdr->table_id) != NPS_INFO_TABLE_ID) {
dev_dbg(adev->dev, "invalid ip discovery nps info table id\n");
return -EINVAL;
}
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
le32_to_cpu(nhdr->size_bytes),
checksum)) {
dev_dbg(adev->dev, "invalid nps info data table checksum\n");
@@ -417,8 +440,11 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev)
{
- if (amdgpu_discovery == 2)
+ if (amdgpu_discovery == 2) {
+ /* Assume there is valid discovery TMR in VRAM even if binary is sideloaded */
+ adev->discovery.reserve_tmr = true;
return "amdgpu/ip_discovery.bin";
+ }
switch (adev->asic_type) {
case CHIP_VEGA10:
@@ -447,49 +473,53 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
{
struct table_info *info;
struct binary_header *bhdr;
+ uint8_t *discovery_bin;
const char *fw_name;
uint16_t offset;
uint16_t size;
uint16_t checksum;
int r;
- adev->mman.discovery_tmr_size = DISCOVERY_TMR_SIZE;
- adev->mman.discovery_bin = kzalloc(adev->mman.discovery_tmr_size, GFP_KERNEL);
- if (!adev->mman.discovery_bin)
+ adev->discovery.bin = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL);
+ if (!adev->discovery.bin)
return -ENOMEM;
+ adev->discovery.size = DISCOVERY_TMR_SIZE;
+ adev->discovery.debugfs_blob.data = adev->discovery.bin;
+ adev->discovery.debugfs_blob.size = adev->discovery.size;
+ discovery_bin = adev->discovery.bin;
/* Read from file if it is the preferred option */
fw_name = amdgpu_discovery_get_fw_name(adev);
if (fw_name != NULL) {
drm_dbg(&adev->ddev, "use ip discovery information from file");
- r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name);
+ r = amdgpu_discovery_read_binary_from_file(adev, discovery_bin,
+ fw_name);
if (r)
goto out;
} else {
drm_dbg(&adev->ddev, "use ip discovery information from memory");
- r = amdgpu_discovery_read_binary_from_mem(
- adev, adev->mman.discovery_bin);
+ r = amdgpu_discovery_read_binary_from_mem(adev, discovery_bin);
if (r)
goto out;
}
/* check the ip discovery binary signature */
- if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) {
+ if (!amdgpu_discovery_verify_binary_signature(discovery_bin)) {
dev_err(adev->dev,
"get invalid ip discovery binary signature\n");
r = -EINVAL;
goto out;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = offsetof(struct binary_header, binary_checksum) +
sizeof(bhdr->binary_checksum);
size = le16_to_cpu(bhdr->binary_size) - offset;
checksum = le16_to_cpu(bhdr->binary_checksum);
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- size, checksum)) {
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset, size,
+ checksum)) {
dev_err(adev->dev, "invalid ip discovery binary checksum\n");
r = -EINVAL;
goto out;
@@ -501,15 +531,16 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
if (offset) {
struct ip_discovery_header *ihdr =
- (struct ip_discovery_header *)(adev->mman.discovery_bin + offset);
+ (struct ip_discovery_header *)(discovery_bin + offset);
if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
dev_err(adev->dev, "invalid ip discovery data table signature\n");
r = -EINVAL;
goto out;
}
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- le16_to_cpu(ihdr->size), checksum)) {
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+ le16_to_cpu(ihdr->size),
+ checksum)) {
dev_err(adev->dev, "invalid ip discovery data table checksum\n");
r = -EINVAL;
goto out;
@@ -522,7 +553,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
if (offset) {
struct gpu_info_header *ghdr =
- (struct gpu_info_header *)(adev->mman.discovery_bin + offset);
+ (struct gpu_info_header *)(discovery_bin + offset);
if (le32_to_cpu(ghdr->table_id) != GC_TABLE_ID) {
dev_err(adev->dev, "invalid ip discovery gc table id\n");
@@ -530,8 +561,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
goto out;
}
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- le32_to_cpu(ghdr->size), checksum)) {
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+ le32_to_cpu(ghdr->size),
+ checksum)) {
dev_err(adev->dev, "invalid gc data table checksum\n");
r = -EINVAL;
goto out;
@@ -544,7 +576,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
if (offset) {
struct harvest_info_header *hhdr =
- (struct harvest_info_header *)(adev->mman.discovery_bin + offset);
+ (struct harvest_info_header *)(discovery_bin + offset);
if (le32_to_cpu(hhdr->signature) != HARVEST_TABLE_SIGNATURE) {
dev_err(adev->dev, "invalid ip discovery harvest table signature\n");
@@ -552,8 +584,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
goto out;
}
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- sizeof(struct harvest_table), checksum)) {
+ if (!amdgpu_discovery_verify_checksum(
+ discovery_bin + offset,
+ sizeof(struct harvest_table), checksum)) {
dev_err(adev->dev, "invalid harvest data table checksum\n");
r = -EINVAL;
goto out;
@@ -566,7 +599,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
if (offset) {
struct vcn_info_header *vhdr =
- (struct vcn_info_header *)(adev->mman.discovery_bin + offset);
+ (struct vcn_info_header *)(discovery_bin + offset);
if (le32_to_cpu(vhdr->table_id) != VCN_INFO_TABLE_ID) {
dev_err(adev->dev, "invalid ip discovery vcn table id\n");
@@ -574,8 +607,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
goto out;
}
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- le32_to_cpu(vhdr->size_bytes), checksum)) {
+ if (!amdgpu_discovery_verify_checksum(
+ discovery_bin + offset,
+ le32_to_cpu(vhdr->size_bytes), checksum)) {
dev_err(adev->dev, "invalid vcn data table checksum\n");
r = -EINVAL;
goto out;
@@ -588,7 +622,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
if (0 && offset) {
struct mall_info_header *mhdr =
- (struct mall_info_header *)(adev->mman.discovery_bin + offset);
+ (struct mall_info_header *)(discovery_bin + offset);
if (le32_to_cpu(mhdr->table_id) != MALL_INFO_TABLE_ID) {
dev_err(adev->dev, "invalid ip discovery mall table id\n");
@@ -596,8 +630,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
goto out;
}
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- le32_to_cpu(mhdr->size_bytes), checksum)) {
+ if (!amdgpu_discovery_verify_checksum(
+ discovery_bin + offset,
+ le32_to_cpu(mhdr->size_bytes), checksum)) {
dev_err(adev->dev, "invalid mall data table checksum\n");
r = -EINVAL;
goto out;
@@ -607,8 +642,8 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
return 0;
out:
- kfree(adev->mman.discovery_bin);
- adev->mman.discovery_bin = NULL;
+ kfree(adev->discovery.bin);
+ adev->discovery.bin = NULL;
if ((amdgpu_discovery != 2) &&
(RREG32(mmIP_DISCOVERY_VERSION) == 4))
amdgpu_ras_query_boot_status(adev, 4);
@@ -620,8 +655,8 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev);
void amdgpu_discovery_fini(struct amdgpu_device *adev)
{
amdgpu_discovery_sysfs_fini(adev);
- kfree(adev->mman.discovery_bin);
- adev->mman.discovery_bin = NULL;
+ kfree(adev->discovery.bin);
+ adev->discovery.bin = NULL;
}
static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev,
@@ -646,6 +681,7 @@ static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev,
static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
uint32_t *vcn_harvest_count)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
@@ -655,21 +691,21 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
uint8_t inst;
int i, j;
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ bhdr = (struct binary_header *)discovery_bin;
+ ihdr = (struct ip_discovery_header
+ *)(discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
num_dies = le16_to_cpu(ihdr->num_dies);
/* scan harvest bit of all IP data structures */
for (i = 0; i < num_dies; i++) {
die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
- dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
+ dhdr = (struct die_header *)(discovery_bin + die_offset);
num_ips = le16_to_cpu(dhdr->num_ips);
ip_offset = die_offset + sizeof(*dhdr);
for (j = 0; j < num_ips; j++) {
- ip = (struct ip *)(adev->mman.discovery_bin +
- ip_offset);
+ ip = (struct ip *)(discovery_bin + ip_offset);
inst = ip->number_instance;
hw_id = le16_to_cpu(ip->hw_id);
if (amdgpu_discovery_validate_ip(adev, inst, hw_id))
@@ -711,13 +747,14 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
uint32_t *vcn_harvest_count,
uint32_t *umc_harvest_count)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
struct harvest_table *harvest_info;
u16 offset;
int i;
uint32_t umc_harvest_config = 0;
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset);
if (!offset) {
@@ -725,7 +762,7 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
return;
}
- harvest_info = (struct harvest_table *)(adev->mman.discovery_bin + offset);
+ harvest_info = (struct harvest_table *)(discovery_bin + offset);
for (i = 0; i < 32; i++) {
if (le16_to_cpu(harvest_info->list[i].hw_id) == 0)
@@ -1021,8 +1058,8 @@ static void ip_disc_release(struct kobject *kobj)
kobj);
struct amdgpu_device *adev = ip_top->adev;
- adev->ip_top = NULL;
kfree(ip_top);
+ adev->discovery.ip_top = NULL;
}
static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev,
@@ -1062,6 +1099,7 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
const size_t _ip_offset, const int num_ips,
bool reg_base_64)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
int ii, jj, kk, res;
uint16_t hw_id;
uint8_t inst;
@@ -1079,7 +1117,7 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
struct ip_v4 *ip;
struct ip_hw_instance *ip_hw_instance;
- ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
+ ip = (struct ip_v4 *)(discovery_bin + ip_offset);
inst = ip->instance_number;
hw_id = le16_to_cpu(ip->hw_id);
if (amdgpu_discovery_validate_ip(adev, inst, hw_id) ||
@@ -1166,17 +1204,20 @@ next_ip:
static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
{
+ struct ip_discovery_top *ip_top = adev->discovery.ip_top;
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
- struct kset *die_kset = &adev->ip_top->die_kset;
+ struct kset *die_kset = &ip_top->die_kset;
u16 num_dies, die_offset, num_ips;
size_t ip_offset;
int ii, res;
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ bhdr = (struct binary_header *)discovery_bin;
+ ihdr = (struct ip_discovery_header
+ *)(discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
num_dies = le16_to_cpu(ihdr->num_dies);
DRM_DEBUG("number of dies: %d\n", num_dies);
@@ -1185,7 +1226,7 @@ static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
struct ip_die_entry *ip_die_entry;
die_offset = le16_to_cpu(ihdr->die_info[ii].die_offset);
- dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
+ dhdr = (struct die_header *)(discovery_bin + die_offset);
num_ips = le16_to_cpu(dhdr->num_ips);
ip_offset = die_offset + sizeof(*dhdr);
@@ -1219,30 +1260,32 @@ static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct ip_discovery_top *ip_top;
struct kset *die_kset;
int res, ii;
- if (!adev->mman.discovery_bin)
+ if (!discovery_bin)
return -EINVAL;
- adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL);
- if (!adev->ip_top)
+ ip_top = kzalloc(sizeof(*ip_top), GFP_KERNEL);
+ if (!ip_top)
return -ENOMEM;
- adev->ip_top->adev = adev;
-
- res = kobject_init_and_add(&adev->ip_top->kobj, &ip_discovery_ktype,
+ ip_top->adev = adev;
+ adev->discovery.ip_top = ip_top;
+ res = kobject_init_and_add(&ip_top->kobj, &ip_discovery_ktype,
&adev->dev->kobj, "ip_discovery");
if (res) {
DRM_ERROR("Couldn't init and add ip_discovery/");
goto Err;
}
- die_kset = &adev->ip_top->die_kset;
+ die_kset = &ip_top->die_kset;
kobject_set_name(&die_kset->kobj, "%s", "die");
- die_kset->kobj.parent = &adev->ip_top->kobj;
+ die_kset->kobj.parent = &ip_top->kobj;
die_kset->kobj.ktype = &die_kobj_ktype;
- res = kset_register(&adev->ip_top->die_kset);
+ res = kset_register(&ip_top->die_kset);
if (res) {
DRM_ERROR("Couldn't register die_kset");
goto Err;
@@ -1256,7 +1299,7 @@ static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev)
return res;
Err:
- kobject_put(&adev->ip_top->kobj);
+ kobject_put(&ip_top->kobj);
return res;
}
@@ -1301,10 +1344,11 @@ static void amdgpu_discovery_sysfs_die_free(struct ip_die_entry *ip_die_entry)
static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
{
+ struct ip_discovery_top *ip_top = adev->discovery.ip_top;
struct list_head *el, *tmp;
struct kset *die_kset;
- die_kset = &adev->ip_top->die_kset;
+ die_kset = &ip_top->die_kset;
spin_lock(&die_kset->list_lock);
list_for_each_prev_safe(el, tmp, &die_kset->list) {
list_del_init(el);
@@ -1313,8 +1357,8 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
spin_lock(&die_kset->list_lock);
}
spin_unlock(&die_kset->list_lock);
- kobject_put(&adev->ip_top->die_kset.kobj);
- kobject_put(&adev->ip_top->kobj);
+ kobject_put(&ip_top->die_kset.kobj);
+ kobject_put(&ip_top->kobj);
}
/* ================================================== */
@@ -1325,6 +1369,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
+ uint8_t *discovery_bin;
struct ip_v4 *ip;
uint16_t die_offset;
uint16_t ip_offset;
@@ -1340,22 +1385,23 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
r = amdgpu_discovery_init(adev);
if (r)
return r;
-
+ discovery_bin = adev->discovery.bin;
wafl_ver = 0;
adev->gfx.xcc_mask = 0;
adev->sdma.sdma_mask = 0;
adev->vcn.inst_mask = 0;
adev->jpeg.inst_mask = 0;
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ bhdr = (struct binary_header *)discovery_bin;
+ ihdr = (struct ip_discovery_header
+ *)(discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
num_dies = le16_to_cpu(ihdr->num_dies);
DRM_DEBUG("number of dies: %d\n", num_dies);
for (i = 0; i < num_dies; i++) {
die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
- dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
+ dhdr = (struct die_header *)(discovery_bin + die_offset);
num_ips = le16_to_cpu(dhdr->num_ips);
ip_offset = die_offset + sizeof(*dhdr);
@@ -1369,7 +1415,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
le16_to_cpu(dhdr->die_id), num_ips);
for (j = 0; j < num_ips; j++) {
- ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
+ ip = (struct ip_v4 *)(discovery_bin + ip_offset);
inst = ip->instance_number;
hw_id = le16_to_cpu(ip->hw_id);
@@ -1519,16 +1565,16 @@ next_ip:
static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct ip_discovery_header *ihdr;
struct binary_header *bhdr;
int vcn_harvest_count = 0;
int umc_harvest_count = 0;
uint16_t offset, ihdr_ver;
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset);
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- offset);
+ ihdr = (struct ip_discovery_header *)(discovery_bin + offset);
ihdr_ver = le16_to_cpu(ihdr->version);
/*
* Harvest table does not fit Navi1x and legacy GPUs,
@@ -1575,22 +1621,23 @@ union gc_info {
static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
union gc_info *gc_info;
u16 offset;
- if (!adev->mman.discovery_bin) {
+ if (!discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
return -EINVAL;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = le16_to_cpu(bhdr->table_list[GC].offset);
if (!offset)
return 0;
- gc_info = (union gc_info *)(adev->mman.discovery_bin + offset);
+ gc_info = (union gc_info *)(discovery_bin + offset);
switch (le16_to_cpu(gc_info->v1.header.version_major)) {
case 1:
@@ -1683,24 +1730,25 @@ union mall_info {
static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
union mall_info *mall_info;
u32 u, mall_size_per_umc, m_s_present, half_use;
u64 mall_size;
u16 offset;
- if (!adev->mman.discovery_bin) {
+ if (!discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
return -EINVAL;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = le16_to_cpu(bhdr->table_list[MALL_INFO].offset);
if (!offset)
return 0;
- mall_info = (union mall_info *)(adev->mman.discovery_bin + offset);
+ mall_info = (union mall_info *)(discovery_bin + offset);
switch (le16_to_cpu(mall_info->v1.header.version_major)) {
case 1:
@@ -1739,12 +1787,13 @@ union vcn_info {
static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
union vcn_info *vcn_info;
u16 offset;
int v;
- if (!adev->mman.discovery_bin) {
+ if (!discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
return -EINVAL;
}
@@ -1759,13 +1808,13 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
return -EINVAL;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = le16_to_cpu(bhdr->table_list[VCN_INFO].offset);
if (!offset)
return 0;
- vcn_info = (union vcn_info *)(adev->mman.discovery_bin + offset);
+ vcn_info = (union vcn_info *)(discovery_bin + offset);
switch (le16_to_cpu(vcn_info->v1.header.version_major)) {
case 1:
@@ -1825,6 +1874,7 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
struct amdgpu_gmc_memrange **ranges,
int *range_cnt, bool refresh)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct amdgpu_gmc_memrange *mem_ranges;
struct binary_header *bhdr;
union nps_info *nps_info;
@@ -1841,13 +1891,13 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
return r;
nps_info = &nps_data;
} else {
- if (!adev->mman.discovery_bin) {
+ if (!discovery_bin) {
dev_err(adev->dev,
"fetch mem range failed, ip discovery uninitialized\n");
return -EINVAL;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
if (!offset)
@@ -1857,8 +1907,7 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
return -ENOENT;
- nps_info =
- (union nps_info *)(adev->mman.discovery_bin + offset);
+ nps_info = (union nps_info *)(discovery_bin + offset);
}
switch (le16_to_cpu(nps_info->v1.header.version_major)) {
@@ -2361,6 +2410,21 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
amdgpu_ip_version(adev, SDMA0_HWIP, 0));
return -EINVAL;
}
+
+ return 0;
+}
+
+static int amdgpu_discovery_set_ras_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ amdgpu_device_ip_block_add(adev, &ras_v1_0_ip_block);
+ break;
+ default:
+ break;
+ }
return 0;
}
@@ -3141,6 +3205,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
if (r)
return r;
+ r = amdgpu_discovery_set_ras_ip_blocks(adev);
+ if (r)
+ return r;
+
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
!amdgpu_sriov_vf(adev)) ||
(adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index b44d56465c5b..4ce04486cc31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -24,9 +24,21 @@
#ifndef __AMDGPU_DISCOVERY__
#define __AMDGPU_DISCOVERY__
+#include <linux/debugfs.h>
+
#define DISCOVERY_TMR_SIZE (10 << 10)
#define DISCOVERY_TMR_OFFSET (64 << 10)
+struct ip_discovery_top;
+
+struct amdgpu_discovery_info {
+ struct debugfs_blob_wrapper debugfs_blob;
+ struct ip_discovery_top *ip_top;
+ uint32_t size;
+ uint8_t *bin;
+ bool reserve_tmr;
+};
+
void amdgpu_discovery_fini(struct amdgpu_device *adev);
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 51bab32fd8c6..b5d34797d606 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -332,8 +332,6 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
if (crtc->enabled)
active = true;
- pm_runtime_mark_last_busy(dev->dev);
-
adev = drm_to_adev(dev);
/* if we have active crtcs and we don't have a power ref,
* take the current one
@@ -1365,6 +1363,64 @@ static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = {
{ AMDGPU_FMT_DITHER_ENABLE, "on" },
};
+/**
+ * DOC: property for adaptive backlight modulation
+ *
+ * The 'adaptive backlight modulation' property is used for the compositor to
+ * directly control the adaptive backlight modulation power savings feature
+ * that is part of DCN hardware.
+ *
+ * The property will be attached specifically to eDP panels that support it.
+ *
+ * The property is by default set to 'sysfs' to allow the sysfs file 'panel_power_savings'
+ * to be able to control it.
+ * If set to 'off' the compositor will ensure it stays off.
+ * The other values 'min', 'bias min', 'bias max', and 'max' will control the
+ * intensity of the power savings.
+ *
+ * Modifying this value can have implications on color accuracy, so tread
+ * carefully.
+ */
+static int amdgpu_display_setup_abm_prop(struct amdgpu_device *adev)
+{
+ const struct drm_prop_enum_list props[] = {
+ { ABM_SYSFS_CONTROL, "sysfs" },
+ { ABM_LEVEL_OFF, "off" },
+ { ABM_LEVEL_MIN, "min" },
+ { ABM_LEVEL_BIAS_MIN, "bias min" },
+ { ABM_LEVEL_BIAS_MAX, "bias max" },
+ { ABM_LEVEL_MAX, "max" },
+ };
+ struct drm_property *prop;
+ int i;
+
+ if (!adev->dc_enabled)
+ return 0;
+
+ prop = drm_property_create(adev_to_drm(adev), DRM_MODE_PROP_ENUM,
+ "adaptive backlight modulation",
+ 6);
+ if (!prop)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(props); i++) {
+ int ret;
+
+ ret = drm_property_add_enum(prop, props[i].type,
+ props[i].name);
+
+ if (ret) {
+ drm_property_destroy(adev_to_drm(adev), prop);
+
+ return ret;
+ }
+ }
+
+ adev->mode_info.abm_level_property = prop;
+
+ return 0;
+}
+
int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
{
int sz;
@@ -1411,7 +1467,7 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
"dither",
amdgpu_dither_enum_list, sz);
- return 0;
+ return amdgpu_display_setup_abm_prop(adev);
}
void amdgpu_display_update_priority(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 930c171473b4..49a29bf47a37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -55,4 +55,11 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev);
int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
struct drm_scanout_buffer *sb);
+#define ABM_SYSFS_CONTROL -1
+#define ABM_LEVEL_OFF 0
+#define ABM_LEVEL_MIN 1
+#define ABM_LEVEL_BIAS_MIN 2
+#define ABM_LEVEL_BIAS_MAX 3
+#define ABM_LEVEL_MAX 4
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index ed3bef1edfe4..e22cfa7c6d32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -81,6 +81,19 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ int r;
+
+ /*
+ * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+.
+ * Such buffers cannot be safely accessed over P2P due to device-local
+ * compression metadata. Fallback to system-memory path instead.
+ * Device supports GFX12 (GC 12.x or newer)
+ * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag
+ *
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) &&
+ bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
+ attach->peer2peer = false;
/*
* Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+.
@@ -98,8 +111,14 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
attach->peer2peer = false;
+ r = dma_resv_lock(bo->tbo.base.resv, NULL);
+ if (r)
+ return r;
+
amdgpu_vm_bo_update_shared(bo);
+ dma_resv_unlock(bo->tbo.base.resv);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 7333e19291cf..2dfbddcef9ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -312,7 +312,7 @@ module_param_named(moverate, amdgpu_moverate, int, 0600);
* DOC: audio (int)
* Set HDMI/DPAudio. Only affects non-DC display handling. The default is -1 (Enabled), set 0 to disabled it.
*/
-MODULE_PARM_DESC(audio, "Audio enable (-1 = auto, 0 = disable, 1 = enable)");
+MODULE_PARM_DESC(audio, "HDMI/DP Audio enable for non DC displays (-1 = auto, 0 = disable, 1 = enable)");
module_param_named(audio, amdgpu_audio, int, 0444);
/**
@@ -354,22 +354,16 @@ module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint
* DOC: lockup_timeout (string)
* Set GPU scheduler timeout value in ms.
*
- * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
- * multiple values specified. 0 and negative values are invalidated. They will be adjusted
- * to the default timeout.
+ * The format can be [single value] for setting all timeouts at once or
+ * [GFX,Compute,SDMA,Video] to set individual timeouts.
+ * Negative values mean infinity.
*
- * - With one value specified, the setting will apply to all non-compute jobs.
- * - With multiple values specified, the first one will be for GFX.
- * The second one is for Compute. The third and fourth ones are
- * for SDMA and Video.
- *
- * By default(with no lockup_timeout settings), the timeout for all jobs is 10000.
+ * By default(with no lockup_timeout settings), the timeout for all queues is 2000.
*/
MODULE_PARM_DESC(lockup_timeout,
- "GPU lockup timeout in ms (default: 10000 for all jobs. "
- "0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
- "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
-module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
+ "GPU lockup timeout in ms (default: 2000. 0: keep default value. negative: infinity timeout), format: [single value for all] or [GFX,Compute,SDMA,Video].");
+module_param_string(lockup_timeout, amdgpu_lockup_timeout,
+ sizeof(amdgpu_lockup_timeout), 0444);
/**
* DOC: dpm (int)
@@ -624,39 +618,39 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644);
/**
* DOC: si_support (int)
- * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled,
- * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
- * otherwise using amdgpu driver.
- */
+ * 1 = enabled, 0 = disabled, -1 = default
+ *
+ * SI (Southern Islands) are first generation GCN GPUs, supported by both
+ * drivers: radeon (old) and amdgpu (new). This parameter controls whether
+ * amdgpu should support SI.
+ * By default, SI dedicated GPUs are supported by amdgpu.
+ * Only relevant when CONFIG_DRM_AMDGPU_SI is enabled to build SI support in amdgpu.
+ * See also radeon.si_support which should be disabled when amdgpu.si_support is
+ * enabled, and vice versa.
+ */
+int amdgpu_si_support = -1;
#ifdef CONFIG_DRM_AMDGPU_SI
-
-#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE)
-int amdgpu_si_support;
-MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))");
-#else
-int amdgpu_si_support = 1;
-MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)");
-#endif
-
+MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled, -1 = default)");
module_param_named(si_support, amdgpu_si_support, int, 0444);
#endif
/**
* DOC: cik_support (int)
- * Set CIK support driver. This parameter works after set config CONFIG_DRM_AMDGPU_CIK. For CIK asic, when radeon driver is enabled,
- * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
- * otherwise using amdgpu driver.
- */
+ * 1 = enabled, 0 = disabled, -1 = default
+ *
+ * CIK (Sea Islands) are second generation GCN GPUs, supported by both
+ * drivers: radeon (old) and amdgpu (new). This parameter controls whether
+ * amdgpu should support CIK.
+ * By default:
+ * - CIK dedicated GPUs are supported by amdgpu.
+ * - CIK APUs are supported by radeon (except when radeon is not built).
+ * Only relevant when CONFIG_DRM_AMDGPU_CIK is enabled to build CIK support in amdgpu.
+ * See also radeon.cik_support which should be disabled when amdgpu.cik_support is
+ * enabled, and vice versa.
+ */
+int amdgpu_cik_support = -1;
#ifdef CONFIG_DRM_AMDGPU_CIK
-
-#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE)
-int amdgpu_cik_support;
-MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))");
-#else
-int amdgpu_cik_support = 1;
-MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)");
-#endif
-
+MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled, -1 = default)");
module_param_named(cik_support, amdgpu_cik_support, int, 0444);
#endif
@@ -2234,7 +2228,6 @@ static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
adev->pdev->bus->number, i);
if (p) {
pm_runtime_get_sync(&p->dev);
- pm_runtime_mark_last_busy(&p->dev);
pm_runtime_put_autosuspend(&p->dev);
pci_dev_put(p);
}
@@ -2313,6 +2306,72 @@ static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long fl
return flags;
}
+static bool amdgpu_support_enabled(struct device *dev,
+ const enum amd_asic_type family)
+{
+ const char *gen;
+ const char *param;
+ int module_param = -1;
+ bool radeon_support_built = IS_ENABLED(CONFIG_DRM_RADEON);
+ bool amdgpu_support_built = false;
+ bool support_by_default = false;
+
+ switch (family) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ gen = "SI";
+ param = "si_support";
+ module_param = amdgpu_si_support;
+ amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU_SI);
+ support_by_default = true;
+ break;
+
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ support_by_default = true;
+ fallthrough;
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ gen = "CIK";
+ param = "cik_support";
+ module_param = amdgpu_cik_support;
+ amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU_CIK);
+ break;
+
+ default:
+ /* All other chips are supported by amdgpu only */
+ return true;
+ }
+
+ if (!amdgpu_support_built) {
+ dev_info(dev, "amdgpu built without %s support\n", gen);
+ return false;
+ }
+
+ if ((module_param == -1 && (support_by_default || !radeon_support_built)) ||
+ module_param == 1) {
+ if (radeon_support_built)
+ dev_info(dev, "%s support provided by amdgpu.\n"
+ "Use radeon.%s=1 amdgpu.%s=0 to override.\n",
+ gen, param, param);
+
+ return true;
+ }
+
+ if (radeon_support_built)
+ dev_info(dev, "%s support provided by radeon.\n"
+ "Use radeon.%s=0 amdgpu.%s=1 to override.\n",
+ gen, param, param);
+ else if (module_param == 0)
+ dev_info(dev, "%s support disabled by module param\n", gen);
+
+ return false;
+}
+
static int amdgpu_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@@ -2360,48 +2419,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENOTSUPP;
}
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_HAINAN:
-#ifdef CONFIG_DRM_AMDGPU_SI
- if (!amdgpu_si_support) {
- dev_info(&pdev->dev,
- "SI support provided by radeon.\n");
- dev_info(&pdev->dev,
- "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
- );
- return -ENODEV;
- }
- break;
-#else
- dev_info(&pdev->dev, "amdgpu is built without SI support.\n");
- return -ENODEV;
-#endif
- case CHIP_KAVERI:
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KABINI:
- case CHIP_MULLINS:
-#ifdef CONFIG_DRM_AMDGPU_CIK
- if (!amdgpu_cik_support) {
- dev_info(&pdev->dev,
- "CIK support provided by radeon.\n");
- dev_info(&pdev->dev,
- "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
- );
- return -ENODEV;
- }
- break;
-#else
- dev_info(&pdev->dev, "amdgpu is built without CIK support.\n");
+ if (!amdgpu_support_enabled(&pdev->dev, flags & AMD_ASIC_MASK))
return -ENODEV;
-#endif
- default:
- break;
- }
adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
if (IS_ERR(adev))
@@ -2480,7 +2499,6 @@ retry_init:
pm_runtime_allow(ddev->dev);
- pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
pci_wake_from_d3(pdev, TRUE);
@@ -2564,7 +2582,8 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
*/
if (!amdgpu_passthrough(adev))
adev->mp1_state = PP_MP1_STATE_UNLOAD;
- amdgpu_device_ip_suspend(adev);
+ amdgpu_device_prepare(dev);
+ amdgpu_device_suspend(dev, true);
adev->mp1_state = PP_MP1_STATE_NONE;
}
@@ -2782,22 +2801,8 @@ static int amdgpu_runtime_idle_check_userq(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct drm_device *drm_dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
- struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
- int queue_id;
- int ret = 0;
-
- mutex_lock(&adev->userq_mutex);
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
- idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- ret = -EBUSY;
- goto done;
- }
- }
-done:
- mutex_unlock(&adev->userq_mutex);
- return ret;
+ return xa_empty(&adev->userq_doorbell_xa) ? 0 : -EBUSY;
}
static int amdgpu_pmops_runtime_suspend(struct device *dev)
@@ -2944,7 +2949,6 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
ret = amdgpu_runtime_idle_check_userq(dev);
done:
- pm_runtime_mark_last_busy(dev);
pm_runtime_autosuspend(dev);
return ret;
}
@@ -2980,7 +2984,6 @@ long amdgpu_drm_ioctl(struct file *filp,
ret = drm_ioctl(filp, cmd, arg);
- pm_runtime_mark_last_busy(dev->dev);
out:
pm_runtime_put_autosuspend(dev->dev);
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 18a7829122d2..c7843e336310 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -45,16 +45,11 @@
* Cast helper
*/
static const struct dma_fence_ops amdgpu_fence_ops;
-static const struct dma_fence_ops amdgpu_job_fence_ops;
static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
{
struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
- if (__f->base.ops == &amdgpu_fence_ops ||
- __f->base.ops == &amdgpu_job_fence_ops)
- return __f;
-
- return NULL;
+ return __f;
}
/**
@@ -98,51 +93,32 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
* amdgpu_fence_emit - emit a fence on the requested ring
*
* @ring: ring the fence is associated with
- * @f: resulting fence object
* @af: amdgpu fence input
* @flags: flags to pass into the subordinate .emit_fence() call
*
* Emits a fence command on the requested ring (all asics).
* Returns 0 on success, -ENOMEM on failure.
*/
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
- struct amdgpu_fence *af, unsigned int flags)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+ unsigned int flags)
{
struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence;
- struct amdgpu_fence *am_fence;
struct dma_fence __rcu **ptr;
uint32_t seq;
int r;
- if (!af) {
- /* create a separate hw fence */
- am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL);
- if (!am_fence)
- return -ENOMEM;
- } else {
- am_fence = af;
- }
- fence = &am_fence->base;
- am_fence->ring = ring;
+ fence = &af->base;
+ af->ring = ring;
seq = ++ring->fence_drv.sync_seq;
- am_fence->seq = seq;
- if (af) {
- dma_fence_init(fence, &amdgpu_job_fence_ops,
- &ring->fence_drv.lock,
- adev->fence_context + ring->idx, seq);
- /* Against remove in amdgpu_job_{free, free_cb} */
- dma_fence_get(fence);
- } else {
- dma_fence_init(fence, &amdgpu_fence_ops,
- &ring->fence_drv.lock,
- adev->fence_context + ring->idx, seq);
- }
+ dma_fence_init(fence, &amdgpu_fence_ops,
+ &ring->fence_drv.lock,
+ adev->fence_context + ring->idx, seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
- amdgpu_fence_save_wptr(fence);
+ amdgpu_fence_save_wptr(af);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -167,8 +143,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
*/
rcu_assign_pointer(*ptr, dma_fence_get(fence));
- *f = fence;
-
return 0;
}
@@ -276,7 +250,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
drv->signalled_wptr = am_fence->wptr;
dma_fence_signal(fence);
dma_fence_put(fence);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
} while (last_seq != seq);
@@ -670,36 +643,6 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
}
/**
- * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring
- *
- * @ring: fence of the ring to be cleared
- *
- */
-void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
-{
- int i;
- struct dma_fence *old, **ptr;
-
- for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
- ptr = &ring->fence_drv.fences[i];
- old = rcu_dereference_protected(*ptr, 1);
- if (old && old->ops == &amdgpu_job_fence_ops) {
- struct amdgpu_job *job;
-
- /* For non-scheduler bad job, i.e. failed ib test, we need to signal
- * it right here or we won't be able to track them in fence_drv
- * and they will remain unsignaled during sa_bo free.
- */
- job = container_of(old, struct amdgpu_job, hw_fence.base);
- if (!job->base.s_fence && !dma_fence_is_signaled(old))
- dma_fence_signal(old);
- RCU_INIT_POINTER(*ptr, NULL);
- dma_fence_put(old);
- }
- }
-}
-
-/**
* amdgpu_fence_driver_set_error - set error code on fences
* @ring: the ring which contains the fences
* @error: the error code to set
@@ -755,7 +698,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
/**
* amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence
*
- * @fence: fence of the ring to signal
+ * @af: fence of the ring to signal
*
*/
void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
@@ -792,15 +735,13 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
} while (last_seq != seq);
spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
/* signal the guilty fence */
- amdgpu_fence_write(ring, af->seq);
+ amdgpu_fence_write(ring, (u32)af->base.seqno);
amdgpu_fence_process(ring);
}
-void amdgpu_fence_save_wptr(struct dma_fence *fence)
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
{
- struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base);
-
- am_fence->wptr = am_fence->ring->wptr;
+ af->wptr = af->ring->wptr;
}
static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
@@ -866,13 +807,6 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
return (const char *)to_amdgpu_fence(f)->ring->name;
}
-static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
-{
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
-
- return (const char *)to_amdgpu_ring(job->base.sched)->name;
-}
-
/**
* amdgpu_fence_enable_signaling - enable signalling on fence
* @f: fence
@@ -890,23 +824,6 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
}
/**
- * amdgpu_job_fence_enable_signaling - enable signalling on job fence
- * @f: fence
- *
- * This is the simliar function with amdgpu_fence_enable_signaling above, it
- * only handles the job embedded fence.
- */
-static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
-{
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
-
- if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
- amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
-
- return true;
-}
-
-/**
* amdgpu_fence_free - free up the fence memory
*
* @rcu: RCU callback head
@@ -922,21 +839,6 @@ static void amdgpu_fence_free(struct rcu_head *rcu)
}
/**
- * amdgpu_job_fence_free - free up the job with embedded fence
- *
- * @rcu: RCU callback head
- *
- * Free up the job with embedded fence after the RCU grace period.
- */
-static void amdgpu_job_fence_free(struct rcu_head *rcu)
-{
- struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
-
- /* free job if fence has a parent job */
- kfree(container_of(f, struct amdgpu_job, hw_fence.base));
-}
-
-/**
* amdgpu_fence_release - callback that fence can be freed
*
* @f: fence
@@ -949,19 +851,6 @@ static void amdgpu_fence_release(struct dma_fence *f)
call_rcu(&f->rcu, amdgpu_fence_free);
}
-/**
- * amdgpu_job_fence_release - callback that job embedded fence can be freed
- *
- * @f: fence
- *
- * This is the simliar function with amdgpu_fence_release above, it
- * only handles the job embedded fence.
- */
-static void amdgpu_job_fence_release(struct dma_fence *f)
-{
- call_rcu(&f->rcu, amdgpu_job_fence_free);
-}
-
static const struct dma_fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name,
@@ -969,13 +858,6 @@ static const struct dma_fence_ops amdgpu_fence_ops = {
.release = amdgpu_fence_release,
};
-static const struct dma_fence_ops amdgpu_job_fence_ops = {
- .get_driver_name = amdgpu_fence_get_driver_name,
- .get_timeline_name = amdgpu_job_fence_get_timeline_name,
- .enable_signaling = amdgpu_job_fence_enable_signaling,
- .release = amdgpu_job_fence_release,
-};
-
/*
* Fence debugfs
*/
@@ -1045,7 +927,6 @@ static int gpu_recover_get(void *data, u64 *val)
*val = atomic_read(&adev->reset_domain->reset_res);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index b2033f8352f5..d2237ce9da70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -302,7 +302,6 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int pages)
{
unsigned t;
- unsigned p;
int i, j;
u64 page_base;
/* Starting from VEGA10, system bit must be 0 to mean invalid. */
@@ -316,8 +315,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
return;
t = offset / AMDGPU_GPU_PAGE_SIZE;
- p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- for (i = 0; i < pages; i++, p++) {
+ for (i = 0; i < pages; i++) {
page_base = adev->dummy_page_addr;
if (!adev->gart.ptr)
continue;
@@ -370,6 +368,42 @@ void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
}
/**
+ * amdgpu_gart_map_vram_range - map VRAM pages into the GART page table
+ *
+ * @adev: amdgpu_device pointer
+ * @pa: physical address of the first page to be mapped
+ * @start_page: first page to map in the GART aperture
+ * @num_pages: number of pages to be mapped
+ * @flags: page table entry flags
+ * @dst: CPU address of the GART table
+ *
+ * Binds a BO that is allocated in VRAM to the GART page table
+ * (all ASICs).
+ *
+ * Useful when a kernel BO is located in VRAM but
+ * needs to be accessed from the GART address space.
+ */
+void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
+ uint64_t start_page, uint64_t num_pages,
+ uint64_t flags, void *dst)
+{
+ u32 i, idx;
+
+ /* The SYSTEM flag indicates the pages aren't in VRAM. */
+ WARN_ON_ONCE(flags & AMDGPU_PTE_SYSTEM);
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return;
+
+ for (i = 0; i < num_pages; ++i) {
+ amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
+ start_page + i, pa + AMDGPU_GPU_PAGE_SIZE * i, flags);
+ }
+
+ drm_dev_exit(idx);
+}
+
+/**
* amdgpu_gart_bind - bind pages into the gart page table
*
* @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index 7cc980bf4725..d3118275ddae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -64,5 +64,8 @@ void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
void *dst);
void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
int pages, dma_addr_t *dma_addr, uint64_t flags);
+void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
+ uint64_t start_page, uint64_t num_pages,
+ uint64_t flags, void *dst);
void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index b7ebae289bea..3e38c5db2987 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -198,7 +198,7 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
amdgpu_hmm_unregister(aobj);
- ttm_bo_put(&aobj->tbo);
+ ttm_bo_fini(&aobj->tbo);
}
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
@@ -531,7 +531,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct drm_amdgpu_gem_userptr *args = data;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct drm_gem_object *gobj;
- struct hmm_range *range;
+ struct amdgpu_hmm_range *range;
struct amdgpu_bo *bo;
uint32_t handle;
int r;
@@ -572,10 +572,14 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
goto release_object;
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
- r = amdgpu_ttm_tt_get_user_pages(bo, &range);
- if (r)
+ range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!range))
+ return -ENOMEM;
+ r = amdgpu_ttm_tt_get_user_pages(bo, range);
+ if (r) {
+ amdgpu_hmm_range_free(range);
goto release_object;
-
+ }
r = amdgpu_bo_reserve(bo, true);
if (r)
goto user_pages_done;
@@ -597,8 +601,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
user_pages_done:
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
-
+ amdgpu_hmm_range_free(range);
release_object:
drm_gem_object_put(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index ebe2b4c68b0f..8b118c53f351 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -33,6 +33,7 @@
#include "amdgpu_reset.h"
#include "amdgpu_xcp.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_mes.h"
#include "nvd.h"
/* delay 0.1 second to enable gfx off feature */
@@ -1194,6 +1195,75 @@ failed_kiq_write:
dev_err(adev->dev, "failed to write reg:%x\n", reg);
}
+int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (adev->enable_mes_kiq && adev->mes.ring[0].sched.ready)
+ return amdgpu_mes_hdp_flush(adev);
+
+ if (!ring->funcs->emit_hdp_flush) {
+ return -EOPNOTSUPP;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_ring_alloc(ring, 32);
+ if (r)
+ goto failed_unlock;
+
+ amdgpu_ring_emit_hdp_flush(ring);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+ goto failed_kiq_hdp_flush;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_hdp_flush;
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY) {
+ dev_err(adev->dev, "failed to flush HDP via KIQ timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+failed_unlock:
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_hdp_flush:
+ dev_err(adev->dev, "failed to flush HDP via KIQ\n");
+ return r < 0 ? r : -EIO;
+}
+
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
{
if (amdgpu_num_kcq == -1) {
@@ -1600,7 +1670,6 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
ret = amdgpu_gfx_run_cleaner_shader(adev, value);
- pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
if (ret)
@@ -2485,3 +2554,4 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
&amdgpu_debugfs_compute_sched_mask_fops);
#endif
}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index fb5f7a0ee029..efd61a1ccc66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -615,6 +615,7 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id);
void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id);
+int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev);
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 55097ca10738..727342689d4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -86,6 +86,11 @@ enum amdgpu_memory_partition {
#define AMDGPU_MAX_MEM_RANGES 8
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY 0x80
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_READ 0x40
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE 0x20
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_EXE 0x10
+
/*
* GMC page fault information
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 0760e70402ec..895c1e4c6747 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -284,6 +284,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);
start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
+ start += amdgpu_vce_required_gart_pages(adev);
size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
drm_mm_init(&mgr->mm, start, size);
spin_lock_init(&mgr->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index 6e02fb9ac2f6..5a60d69a3e1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -66,3 +66,19 @@ void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
0);
}
}
+
+void amdgpu_hdp_invalidate(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (adev->asic_funcs && adev->asic_funcs->invalidate_hdp)
+ adev->asic_funcs->invalidate_hdp(adev, ring);
+ else if (adev->hdp.funcs && adev->hdp.funcs->invalidate_hdp)
+ adev->hdp.funcs->invalidate_hdp(adev, ring);
+}
+
+void amdgpu_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (adev->asic_funcs && adev->asic_funcs->flush_hdp)
+ adev->asic_funcs->flush_hdp(adev, ring);
+ else if (adev->hdp.funcs && adev->hdp.funcs->flush_hdp)
+ adev->hdp.funcs->flush_hdp(adev, ring);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 4cfd932b7e91..d9f488fa76b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -46,4 +46,8 @@ struct amdgpu_hdp {
int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev);
void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
+void amdgpu_hdp_invalidate(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+void amdgpu_hdp_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
#endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index 2c6a6b858112..90d26d820bac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -168,17 +168,13 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo)
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
uint64_t start, uint64_t npages, bool readonly,
void *owner,
- struct hmm_range **phmm_range)
+ struct amdgpu_hmm_range *range)
{
- struct hmm_range *hmm_range;
unsigned long end;
unsigned long timeout;
unsigned long *pfns;
int r = 0;
-
- hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
- if (unlikely(!hmm_range))
- return -ENOMEM;
+ struct hmm_range *hmm_range = &range->hmm_range;
pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
if (unlikely(!pfns)) {
@@ -221,28 +217,77 @@ retry:
hmm_range->start = start;
hmm_range->hmm_pfns = pfns;
- *phmm_range = hmm_range;
-
return 0;
out_free_pfns:
kvfree(pfns);
+ hmm_range->hmm_pfns = NULL;
out_free_range:
- kfree(hmm_range);
-
if (r == -EBUSY)
r = -EAGAIN;
return r;
}
-bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
+/**
+ * amdgpu_hmm_range_valid - check if an HMM range is still valid
+ * @range: pointer to the &struct amdgpu_hmm_range to validate
+ *
+ * Determines whether the given HMM range @range is still valid by
+ * checking for invalidations via the MMU notifier sequence. This is
+ * typically used to verify that the range has not been invalidated
+ * by concurrent address space updates before it is accessed.
+ *
+ * Return:
+ * * true if @range is valid and can be used safely
+ * * false if @range is NULL or has been invalidated
+ */
+bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range)
{
- bool r;
+ if (!range)
+ return false;
- r = mmu_interval_read_retry(hmm_range->notifier,
- hmm_range->notifier_seq);
- kvfree(hmm_range->hmm_pfns);
- kfree(hmm_range);
+ return !mmu_interval_read_retry(range->hmm_range.notifier,
+ range->hmm_range.notifier_seq);
+}
- return r;
+/**
+ * amdgpu_hmm_range_alloc - allocate and initialize an AMDGPU HMM range
+ * @bo: optional buffer object to associate with this HMM range
+ *
+ * Allocates memory for amdgpu_hmm_range and associates it with the @bo passed.
+ * The reference count of the @bo is incremented.
+ *
+ * Return:
+ * Pointer to a newly allocated struct amdgpu_hmm_range on success,
+ * or NULL if memory allocation fails.
+ */
+struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo)
+{
+ struct amdgpu_hmm_range *range;
+
+ range = kzalloc(sizeof(*range), GFP_KERNEL);
+ if (!range)
+ return NULL;
+
+ range->bo = amdgpu_bo_ref(bo);
+ return range;
+}
+
+/**
+ * amdgpu_hmm_range_free - release an AMDGPU HMM range
+ * @range: pointer to the range object to free
+ *
+ * Releases all resources held by @range, including the associated
+ * hmm_pfns and the dropping reference of associated bo if any.
+ *
+ * Return: void
+ */
+void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range)
+{
+ if (!range)
+ return;
+
+ kvfree(range->hmm_range.hmm_pfns);
+ amdgpu_bo_unref(&range->bo);
+ kfree(range);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
index 953e1d06de20..140bc9cd57b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
@@ -31,13 +31,20 @@
#include <linux/interval_tree.h>
#include <linux/mmu_notifier.h>
+struct amdgpu_hmm_range {
+ struct hmm_range hmm_range;
+ struct amdgpu_bo *bo;
+};
+
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
uint64_t start, uint64_t npages, bool readonly,
void *owner,
- struct hmm_range **phmm_range);
-bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
+ struct amdgpu_hmm_range *range);
#if defined(CONFIG_HMM_MIRROR)
+bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range);
+struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo);
+void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range);
int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_hmm_unregister(struct amdgpu_bo *bo);
#else
@@ -47,7 +54,20 @@ static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
"add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
return -ENODEV;
}
+
static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {}
+
+static inline bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range)
+{
+ return false;
+}
+
+static inline struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo)
+{
+ return NULL;
+}
+
+static inline void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range) {}
#endif
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 7d9bcb72e8dd..586a58facca1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -149,17 +149,19 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
if (job) {
vm = job->vm;
fence_ctx = job->base.s_fence ?
- job->base.s_fence->scheduled.context : 0;
+ job->base.s_fence->finished.context : 0;
shadow_va = job->shadow_va;
csa_va = job->csa_va;
gds_va = job->gds_va;
init_shadow = job->init_shadow;
- af = &job->hw_fence;
+ af = job->hw_fence;
/* Save the context of the job for reset handling.
* The driver needs this so it can skip the ring
* contents for guilty contexts.
*/
- af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
+ af->context = fence_ctx;
+ /* the vm fence is also part of the job's context */
+ job->hw_vm_fence->context = fence_ctx;
} else {
vm = NULL;
fence_ctx = 0;
@@ -167,23 +169,28 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
csa_va = 0;
gds_va = 0;
init_shadow = false;
- af = NULL;
+ af = kzalloc(sizeof(*af), GFP_ATOMIC);
+ if (!af)
+ return -ENOMEM;
}
if (!ring->sched.ready) {
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
- return -EINVAL;
+ r = -EINVAL;
+ goto free_fence;
}
if (vm && !job->vmid) {
dev_err(adev->dev, "VM IB without ID\n");
- return -EINVAL;
+ r = -EINVAL;
+ goto free_fence;
}
if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
(!ring->funcs->secure_submission_supported)) {
dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name);
- return -EINVAL;
+ r = -EINVAL;
+ goto free_fence;
}
alloc_size = ring->funcs->emit_frame_size + num_ibs *
@@ -192,7 +199,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
r = amdgpu_ring_alloc(ring, alloc_size);
if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
- return r;
+ goto free_fence;
}
need_ctx_switch = ring->current_ctx != fence_ctx;
@@ -289,7 +296,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
}
- r = amdgpu_fence_emit(ring, f, af, fence_flags);
+ r = amdgpu_fence_emit(ring, af, fence_flags);
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
if (job && job->vmid)
@@ -297,6 +304,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
amdgpu_ring_undo(ring);
return r;
}
+ *f = &af->base;
+ /* get a ref for the job */
+ if (job)
+ dma_fence_get(*f);
if (ring->funcs->insert_end)
ring->funcs->insert_end(ring);
@@ -317,12 +328,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
* fence so we know what rings contents to backup
* after we reset the queue.
*/
- amdgpu_fence_save_wptr(*f);
+ amdgpu_fence_save_wptr(af);
amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
return 0;
+
+free_fence:
+ if (!job)
+ kfree(af);
+ return r;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 3ef5bc95642c..9cab36322c16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -201,58 +201,34 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
- struct dma_fence **fences;
- unsigned i;
+ /* If anybody is waiting for a VMID let everybody wait for fairness */
if (!dma_fence_is_signaled(ring->vmid_wait)) {
*fence = dma_fence_get(ring->vmid_wait);
return 0;
}
- fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_NOWAIT);
- if (!fences)
- return -ENOMEM;
-
/* Check if we have an idle VMID */
- i = 0;
- list_for_each_entry((*idle), &id_mgr->ids_lru, list) {
+ list_for_each_entry_reverse((*idle), &id_mgr->ids_lru, list) {
/* Don't use per engine and per process VMID at the same time */
struct amdgpu_ring *r = adev->vm_manager.concurrent_flush ?
NULL : ring;
- fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, r);
- if (!fences[i])
- break;
- ++i;
+ *fence = amdgpu_sync_peek_fence(&(*idle)->active, r);
+ if (!(*fence))
+ return 0;
}
- /* If we can't find a idle VMID to use, wait till one becomes available */
- if (&(*idle)->list == &id_mgr->ids_lru) {
- u64 fence_context = adev->vm_manager.fence_context + ring->idx;
- unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
- struct dma_fence_array *array;
- unsigned j;
-
- *idle = NULL;
- for (j = 0; j < i; ++j)
- dma_fence_get(fences[j]);
-
- array = dma_fence_array_create(i, fences, fence_context,
- seqno, true);
- if (!array) {
- for (j = 0; j < i; ++j)
- dma_fence_put(fences[j]);
- kfree(fences);
- return -ENOMEM;
- }
-
- *fence = dma_fence_get(&array->base);
- dma_fence_put(ring->vmid_wait);
- ring->vmid_wait = &array->base;
- return 0;
- }
- kfree(fences);
+ /*
+ * If we can't find a idle VMID to use, wait on a fence from the least
+ * recently used in the hope that it will be available soon.
+ */
+ *idle = NULL;
+ dma_fence_put(ring->vmid_wait);
+ ring->vmid_wait = dma_fence_get(*fence);
+ /* This is the reference we return */
+ dma_fence_get(*fence);
return 0;
}
@@ -313,7 +289,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
* user of the VMID.
*/
r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished,
- GFP_NOWAIT);
+ GFP_ATOMIC);
if (r)
return r;
@@ -373,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
*/
r = amdgpu_sync_fence(&(*id)->active,
&job->base.s_fence->finished,
- GFP_NOWAIT);
+ GFP_ATOMIC);
if (r)
return r;
@@ -426,7 +402,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
/* Remember this submission as user of the VMID */
r = amdgpu_sync_fence(&id->active,
&job->base.s_fence->finished,
- GFP_NOWAIT);
+ GFP_ATOMIC);
if (r)
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d020a890a0ea..0a0dcbf0798d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -130,14 +130,12 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
}
/* attempt a per ring reset */
- if (unlikely(adev->debug_disable_gpu_ring_reset)) {
- dev_err(adev->dev, "Ring reset disabled by debug mask\n");
- } else if (amdgpu_gpu_recovery &&
- amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
- ring->funcs->reset) {
+ if (amdgpu_gpu_recovery &&
+ amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
+ ring->funcs->reset) {
dev_err(adev->dev, "Starting %s ring reset\n",
s_job->sched->name);
- r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence);
+ r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
if (!r) {
atomic_inc(&ring->adev->gpu_reset_counter);
dev_err(adev->dev, "Ring %s reset succeeded\n",
@@ -186,6 +184,9 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned int num_ibs, struct amdgpu_job **job,
u64 drm_client_id)
{
+ struct amdgpu_fence *af;
+ int r;
+
if (num_ibs == 0)
return -EINVAL;
@@ -193,6 +194,20 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (!*job)
return -ENOMEM;
+ af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+ if (!af) {
+ r = -ENOMEM;
+ goto err_job;
+ }
+ (*job)->hw_fence = af;
+
+ af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+ if (!af) {
+ r = -ENOMEM;
+ goto err_fence;
+ }
+ (*job)->hw_vm_fence = af;
+
(*job)->vm = vm;
amdgpu_sync_create(&(*job)->explicit_sync);
@@ -204,6 +219,14 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return drm_sched_job_init(&(*job)->base, entity, 1, owner,
drm_client_id);
+
+err_fence:
+ kfree((*job)->hw_fence);
+err_job:
+ kfree(*job);
+ *job = NULL;
+
+ return r;
}
int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
@@ -223,7 +246,10 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
if (r) {
if (entity)
drm_sched_job_cleanup(&(*job)->base);
+ kfree((*job)->hw_vm_fence);
+ kfree((*job)->hw_fence);
kfree(*job);
+ *job = NULL;
}
return r;
@@ -251,11 +277,11 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
struct dma_fence *f;
unsigned i;
- /* Check if any fences where initialized */
+ /* Check if any fences were initialized */
if (job->base.s_fence && job->base.s_fence->finished.ops)
f = &job->base.s_fence->finished;
- else if (job->hw_fence.base.ops)
- f = &job->hw_fence.base;
+ else if (job->hw_fence && job->hw_fence->base.ops)
+ f = &job->hw_fence->base;
else
f = NULL;
@@ -271,11 +297,16 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
amdgpu_sync_free(&job->explicit_sync);
- /* only put the hw fence if has embedded fence */
- if (!job->hw_fence.base.ops)
- kfree(job);
+ if (job->hw_fence->base.ops)
+ dma_fence_put(&job->hw_fence->base);
+ else
+ kfree(job->hw_fence);
+ if (job->hw_vm_fence->base.ops)
+ dma_fence_put(&job->hw_vm_fence->base);
else
- dma_fence_put(&job->hw_fence.base);
+ kfree(job->hw_vm_fence);
+
+ kfree(job);
}
void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
@@ -304,10 +335,16 @@ void amdgpu_job_free(struct amdgpu_job *job)
if (job->gang_submit != &job->base.s_fence->scheduled)
dma_fence_put(job->gang_submit);
- if (!job->hw_fence.base.ops)
- kfree(job);
+ if (job->hw_fence->base.ops)
+ dma_fence_put(&job->hw_fence->base);
+ else
+ kfree(job->hw_fence);
+ if (job->hw_vm_fence->base.ops)
+ dma_fence_put(&job->hw_vm_fence->base);
else
- dma_fence_put(&job->hw_fence.base);
+ kfree(job->hw_vm_fence);
+
+ kfree(job);
}
struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 4a6487eb6cb5..7abf069d17d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -64,7 +64,8 @@ struct amdgpu_job {
struct drm_sched_job base;
struct amdgpu_vm *vm;
struct amdgpu_sync explicit_sync;
- struct amdgpu_fence hw_fence;
+ struct amdgpu_fence *hw_fence;
+ struct amdgpu_fence *hw_vm_fence;
struct dma_fence *gang_submit;
uint32_t preamble_status;
uint32_t preemption_status;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b3e6b3fcdf2c..6ee77f431d56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1471,7 +1471,6 @@ error_pasid:
kfree(fpriv);
out_suspend:
- pm_runtime_mark_last_busy(dev->dev);
pm_put:
pm_runtime_put_autosuspend(dev->dev);
@@ -1539,7 +1538,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
kfree(fpriv);
file_priv->driver_priv = NULL;
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 4883adcfbb4b..9c182ce501af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -105,8 +105,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
spin_lock_init(&adev->mes.ring_lock[i]);
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
- adev->mes.vmid_mask_mmhub = 0xffffff00;
- adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xfffffffe : 0xffffff00;
+ adev->mes.vmid_mask_mmhub = 0xFF00;
+ adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xFFFE : 0xFF00;
num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me;
if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES)
@@ -528,6 +528,18 @@ error:
return r;
}
+int amdgpu_mes_hdp_flush(struct amdgpu_device *adev)
+{
+ uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask;
+
+ hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev);
+ hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev);
+ ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0;
+
+ return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset,
+ ref_and_mask, ref_and_mask);
+}
+
int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
uint64_t process_context_addr,
uint32_t spi_gdbg_per_vmid_cntl,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 97c137c90f97..e989225b354b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -239,6 +239,7 @@ struct mes_add_queue_input {
struct mes_remove_queue_input {
uint32_t doorbell_offset;
uint64_t gang_context_addr;
+ bool remove_queue_after_reset;
};
struct mes_map_legacy_queue_input {
@@ -428,6 +429,7 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev,
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask);
+int amdgpu_mes_hdp_flush(struct amdgpu_device *adev);
int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
uint64_t process_context_addr,
uint32_t spi_gdbg_per_vmid_cntl,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 20460cfd09bc..dc8d2f52c7d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -326,6 +326,8 @@ struct amdgpu_mode_info {
struct drm_property *audio_property;
/* FMT dithering */
struct drm_property *dither_property;
+ /* Adaptive Backlight Modulation (power feature) */
+ struct drm_property *abm_level_property;
/* hardcoded DFP edid from BIOS */
const struct drm_edid *bios_hardcoded_edid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 656b8a931dae..52c2d1731aab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -96,6 +96,7 @@ struct amdgpu_bo_va {
* if non-zero, cannot unmap from GPU because user queues may still access it
*/
unsigned int queue_refcount;
+ atomic_t userq_va_mapped;
};
struct amdgpu_bo {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index aa7987d0806c..0b10497d487c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1539,6 +1539,7 @@ static void psp_xgmi_reflect_topology_info(struct psp_context *psp,
uint64_t src_node_id = psp->adev->gmc.xgmi.node_id;
uint64_t dst_node_id = node_info.node_id;
uint8_t dst_num_hops = node_info.num_hops;
+ uint8_t dst_is_sharing_enabled = node_info.is_sharing_enabled;
uint8_t dst_num_links = node_info.num_links;
hive = amdgpu_get_xgmi_hive(psp->adev);
@@ -1558,13 +1559,20 @@ static void psp_xgmi_reflect_topology_info(struct psp_context *psp,
continue;
mirror_top_info->nodes[j].num_hops = dst_num_hops;
- /*
- * prevent 0 num_links value re-reflection since reflection
+ mirror_top_info->nodes[j].is_sharing_enabled = dst_is_sharing_enabled;
+ /* prevent 0 num_links value re-reflection since reflection
* criteria is based on num_hops (direct or indirect).
- *
*/
- if (dst_num_links)
+ if (dst_num_links) {
mirror_top_info->nodes[j].num_links = dst_num_links;
+ /* swap src and dst due to frame of reference */
+ for (int k = 0; k < dst_num_links; k++) {
+ mirror_top_info->nodes[j].port_num[k].src_xgmi_port_num =
+ node_info.port_num[k].dst_xgmi_port_num;
+ mirror_top_info->nodes[j].port_num[k].dst_xgmi_port_num =
+ node_info.port_num[k].src_xgmi_port_num;
+ }
+ }
break;
}
@@ -1639,9 +1647,10 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
IP_VERSION(13, 0, 6) ||
amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
- IP_VERSION(13, 0, 14);
- bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 :
- psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG;
+ IP_VERSION(13, 0, 14) ||
+ amdgpu_sriov_vf(psp->adev);
+ bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG ||
+ amdgpu_sriov_xgmi_ta_ext_peer_link_en(psp->adev);
/* popluate the shared output buffer rather than the cmd input buffer
* with node_ids as the input for GET_PEER_LINKS command execution.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
index 123bcf5c2bb1..bacf888735db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
@@ -101,7 +101,6 @@ static ssize_t amdgpu_rap_debugfs_write(struct file *f, const char __user *buf,
}
amdgpu_gfx_off_ctrl(adev, true);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e0ee21150860..2a6cf7963dde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -41,6 +41,7 @@
#include "atom.h"
#include "amdgpu_reset.h"
#include "amdgpu_psp.h"
+#include "amdgpu_ras_mgr.h"
#ifdef CONFIG_X86_MCE_AMD
#include <asm/mce.h>
@@ -149,6 +150,8 @@ static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev);
#ifdef CONFIG_X86_MCE_AMD
static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
+static void
+amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev);
struct mce_notifier_adev_list {
struct amdgpu_device *devs[MAX_GPU_INSTANCE];
int num_gpu;
@@ -611,6 +614,8 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
return size;
}
+static int amdgpu_uniras_clear_badpages_info(struct amdgpu_device *adev);
+
/**
* DOC: AMDGPU RAS debugfs EEPROM table reset interface
*
@@ -635,6 +640,11 @@ static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f,
(struct amdgpu_device *)file_inode(f)->i_private;
int ret;
+ if (amdgpu_uniras_enabled(adev)) {
+ ret = amdgpu_uniras_clear_badpages_info(adev);
+ return ret ? ret : size;
+ }
+
ret = amdgpu_ras_eeprom_reset_table(
&(amdgpu_ras_get_context(adev)->eeprom_control));
@@ -1542,9 +1552,51 @@ out_fini_err_data:
return ret;
}
+static int amdgpu_uniras_clear_badpages_info(struct amdgpu_device *adev)
+{
+ struct ras_cmd_dev_handle req = {0};
+ int ret;
+
+ ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__CLEAR_BAD_PAGE_INFO,
+ &req, sizeof(req), NULL, 0);
+ if (ret) {
+ dev_err(adev->dev, "Failed to clear bad pages info, ret: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int amdgpu_uniras_query_block_ecc(struct amdgpu_device *adev,
+ struct ras_query_if *info)
+{
+ struct ras_cmd_block_ecc_info_req req = {0};
+ struct ras_cmd_block_ecc_info_rsp rsp = {0};
+ int ret;
+
+ if (!info)
+ return -EINVAL;
+
+ req.block_id = info->head.block;
+ req.subblock_id = info->head.sub_block_index;
+
+ ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__GET_BLOCK_ECC_STATUS,
+ &req, sizeof(req), &rsp, sizeof(rsp));
+ if (!ret) {
+ info->ce_count = rsp.ce_count;
+ info->ue_count = rsp.ue_count;
+ info->de_count = rsp.de_count;
+ }
+
+ return ret;
+}
+
int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info)
{
- return amdgpu_ras_query_error_status_with_event(adev, info, RAS_EVENT_TYPE_INVALID);
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_uniras_query_block_ecc(adev, info);
+ else
+ return amdgpu_ras_query_error_status_with_event(adev, info, RAS_EVENT_TYPE_INVALID);
}
int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
@@ -1596,6 +1648,27 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
return 0;
}
+static int amdgpu_uniras_error_inject(struct amdgpu_device *adev,
+ struct ras_inject_if *info)
+{
+ struct ras_cmd_inject_error_req inject_req;
+ struct ras_cmd_inject_error_rsp rsp;
+
+ if (!info)
+ return -EINVAL;
+
+ memset(&inject_req, 0, sizeof(inject_req));
+ inject_req.block_id = info->head.block;
+ inject_req.subblock_id = info->head.sub_block_index;
+ inject_req.address = info->address;
+ inject_req.error_type = info->head.type;
+ inject_req.instance_mask = info->instance_mask;
+ inject_req.method = info->value;
+
+ return amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__INJECT_ERROR,
+ &inject_req, sizeof(inject_req), &rsp, sizeof(rsp));
+}
+
/* wrapper of psp_ras_trigger_error */
int amdgpu_ras_error_inject(struct amdgpu_device *adev,
struct ras_inject_if *info)
@@ -1613,6 +1686,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
info->head.block,
info->head.sub_block_index);
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_uniras_error_inject(adev, info);
+
/* inject on guest isn't allowed, return success directly */
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1757,7 +1833,9 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
/* sysfs begin */
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
- struct ras_badpage **bps, unsigned int *count);
+ struct ras_badpage *bps, uint32_t count, uint32_t start);
+static int amdgpu_uniras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage *bps, uint32_t count, uint32_t start);
static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
{
@@ -1815,19 +1893,50 @@ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
unsigned int end = div64_ul(ppos + count - 1, element_size);
ssize_t s = 0;
struct ras_badpage *bps = NULL;
- unsigned int bps_count = 0;
+ int bps_count = 0, i, status;
+ uint64_t address;
memset(buf, 0, count);
- if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
+ bps_count = end - start;
+ bps = kmalloc_array(bps_count, sizeof(*bps), GFP_KERNEL);
+ if (!bps)
+ return 0;
+
+ memset(bps, 0, sizeof(*bps) * bps_count);
+
+ if (amdgpu_uniras_enabled(adev))
+ bps_count = amdgpu_uniras_badpages_read(adev, bps, bps_count, start);
+ else
+ bps_count = amdgpu_ras_badpages_read(adev, bps, bps_count, start);
+
+ if (bps_count <= 0) {
+ kfree(bps);
return 0;
+ }
+
+ for (i = 0; i < bps_count; i++) {
+ address = ((uint64_t)bps[i].bp) << AMDGPU_GPU_PAGE_SHIFT;
+ if (amdgpu_ras_check_critical_address(adev, address))
+ continue;
+
+ bps[i].size = AMDGPU_GPU_PAGE_SIZE;
+
+ status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
+ address);
+ if (status == -EBUSY)
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
+ else if (status == -ENOENT)
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
+ else
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED;
- for (; start < end && start < bps_count; start++)
s += scnprintf(&buf[s], element_size + 1,
"0x%08x : 0x%08x : %1s\n",
- bps[start].bp,
- bps[start].size,
- amdgpu_ras_badpage_flags_str(bps[start].flags));
+ bps[i].bp,
+ bps[i].size,
+ amdgpu_ras_badpage_flags_str(bps[i].flags));
+ }
kfree(bps);
@@ -1843,12 +1952,42 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
}
+static bool amdgpu_ras_get_version_info(struct amdgpu_device *adev, u32 *major,
+ u32 *minor, u32 *rev)
+{
+ int i;
+
+ if (!adev || !major || !minor || !rev || !amdgpu_uniras_enabled(adev))
+ return false;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_RAS) {
+ *major = adev->ip_blocks[i].version->major;
+ *minor = adev->ip_blocks[i].version->minor;
+ *rev = adev->ip_blocks[i].version->rev;
+ return true;
+ }
+ }
+
+ return false;
+}
+
static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct amdgpu_ras *con =
container_of(attr, struct amdgpu_ras, version_attr);
- return sysfs_emit(buf, "table version: 0x%x\n", con->eeprom_control.tbl_hdr.version);
+ u32 major, minor, rev;
+ ssize_t size = 0;
+
+ size += sysfs_emit_at(buf, size, "table version: 0x%x\n",
+ con->eeprom_control.tbl_hdr.version);
+
+ if (amdgpu_ras_get_version_info(con->adev, &major, &minor, &rev))
+ size += sysfs_emit_at(buf, size, "ras version: %u.%u.%u\n",
+ major, minor, rev);
+
+ return size;
}
static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
@@ -2241,6 +2380,11 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY))
return;
+ if (amdgpu_uniras_enabled(adev)) {
+ amdgpu_ras_mgr_handle_fatal_interrupt(adev, NULL);
+ return;
+ }
+
if (adev->nbio.ras &&
adev->nbio.ras->handle_ras_controller_intr_no_bifring)
adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
@@ -2411,6 +2555,16 @@ int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
struct ras_manager *obj;
struct ras_ih_data *data;
+ if (amdgpu_uniras_enabled(adev)) {
+ struct ras_ih_info ih_info;
+
+ memset(&ih_info, 0, sizeof(ih_info));
+ ih_info.block = info->head.block;
+ memcpy(&ih_info.iv_entry, info->entry, sizeof(struct amdgpu_iv_entry));
+
+ return amdgpu_ras_mgr_handle_controller_interrupt(adev, &ih_info);
+ }
+
obj = amdgpu_ras_find_obj(adev, &info->head);
if (!obj)
return -EINVAL;
@@ -2605,62 +2759,83 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
}
}
-/* recovery begin */
-
-/* return 0 on success.
- * caller need free bps.
- */
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
- struct ras_badpage **bps, unsigned int *count)
+ struct ras_badpage *bps, uint32_t count, uint32_t start)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
- int i = 0;
- int ret = 0, status;
+ int r = 0;
+ uint32_t i;
if (!con || !con->eh_data || !bps || !count)
return -EINVAL;
mutex_lock(&con->recovery_lock);
data = con->eh_data;
- if (!data || data->count == 0) {
- *bps = NULL;
- ret = -EINVAL;
- goto out;
+ if (start < data->count) {
+ for (i = start; i < data->count; i++) {
+ if (!data->bps[i].ts)
+ continue;
+
+ bps[r].bp = data->bps[i].retired_page;
+ r++;
+ if (r >= count)
+ break;
+ }
}
+ mutex_unlock(&con->recovery_lock);
- *bps = kmalloc_array(data->count, sizeof(struct ras_badpage), GFP_KERNEL);
- if (!*bps) {
- ret = -ENOMEM;
- goto out;
- }
+ return r;
+}
- for (; i < data->count; i++) {
- if (!data->bps[i].ts)
- continue;
+static int amdgpu_uniras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage *bps, uint32_t count, uint32_t start)
+{
+ struct ras_cmd_bad_pages_info_req cmd_input;
+ struct ras_cmd_bad_pages_info_rsp *output;
+ uint32_t group, start_group, end_group;
+ uint32_t pos, pos_in_group;
+ int r = 0, i;
- (*bps)[i] = (struct ras_badpage){
- .bp = data->bps[i].retired_page,
- .size = AMDGPU_GPU_PAGE_SIZE,
- .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
- };
+ if (!bps || !count)
+ return -EINVAL;
- if (amdgpu_ras_check_critical_address(adev,
- data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
- continue;
+ output = kmalloc(sizeof(*output), GFP_KERNEL);
+ if (!output)
+ return -ENOMEM;
- status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
- data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT);
- if (status == -EBUSY)
- (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
- else if (status == -ENOENT)
- (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
+ memset(&cmd_input, 0, sizeof(cmd_input));
+
+ start_group = start / RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+ end_group = (start + count + RAS_CMD_MAX_BAD_PAGES_PER_GROUP - 1) /
+ RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+
+ pos = start;
+ for (group = start_group; group < end_group; group++) {
+ memset(output, 0, sizeof(*output));
+ cmd_input.group_index = group;
+ if (amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__GET_BAD_PAGES,
+ &cmd_input, sizeof(cmd_input), output, sizeof(*output)))
+ goto out;
+
+ if (pos >= output->bp_total_cnt)
+ goto out;
+
+ pos_in_group = pos - group * RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+ for (i = pos_in_group; i < output->bp_in_group; i++, pos++) {
+ if (!output->records[i].ts)
+ continue;
+
+ bps[r].bp = output->records[i].retired_page;
+ r++;
+ if (r >= count)
+ goto out;
+ }
}
- *count = con->bad_page_num;
out:
- mutex_unlock(&con->recovery_lock);
- return ret;
+ kfree(output);
+ return r;
}
static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev,
@@ -2748,8 +2923,12 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
type = amdgpu_ras_get_fatal_error_event(adev);
list_for_each_entry(remote_adev,
device_list_handle, gmc.xgmi.head) {
- amdgpu_ras_query_err_status(remote_adev);
- amdgpu_ras_log_on_err_counter(remote_adev, type);
+ if (amdgpu_uniras_enabled(remote_adev)) {
+ amdgpu_ras_mgr_update_ras_ecc(remote_adev);
+ } else {
+ amdgpu_ras_query_err_status(remote_adev);
+ amdgpu_ras_log_on_err_counter(remote_adev, type);
+ }
}
}
@@ -2837,8 +3016,13 @@ static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev,
addr_in.ma.err_addr = bps->address;
addr_in.ma.socket_id = socket;
addr_in.ma.ch_inst = bps->mem_channel;
- /* tell RAS TA the node instance is not used */
- addr_in.ma.node_inst = TA_RAS_INV_NODE;
+ if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+ /* tell RAS TA the node instance is not used */
+ addr_in.ma.node_inst = TA_RAS_INV_NODE;
+ } else {
+ addr_in.ma.umc_inst = bps->mcumc_id;
+ addr_in.ma.node_inst = bps->cu;
+ }
if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
ret = adev->umc.ras->convert_ras_err_addr(adev, err_data,
@@ -2981,8 +3165,16 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
int i = 0;
enum amdgpu_memory_partition save_nps;
- save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
- bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+ if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+ save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+ bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+ } else {
+ /* if pmfw manages eeprom, save_nps is not stored on eeprom,
+ * we should always convert mca address into physical address,
+ * make save_nps different from nps
+ */
+ save_nps = nps + 1;
+ }
if (save_nps == nps) {
if (amdgpu_umc_pages_in_a_row(adev, err_data,
@@ -3048,7 +3240,8 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
if (from_rom) {
/* there is no pa recs in V3, so skip pa recs processing */
- if (control->tbl_hdr.version < RAS_TABLE_VER_V3) {
+ if ((control->tbl_hdr.version < RAS_TABLE_VER_V3) &&
+ !amdgpu_ras_smu_eeprom_supported(adev)) {
for (i = 0; i < pages; i++) {
if (control->ras_num_recs - i >= adev->umc.retire_unit) {
if ((bps[i].address == bps[i + 1].address) &&
@@ -3118,7 +3311,13 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
mutex_lock(&con->recovery_lock);
control = &con->eeprom_control;
data = con->eh_data;
- unit_num = data->count / adev->umc.retire_unit - control->ras_num_recs;
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ unit_num = control->ras_num_recs -
+ control->ras_num_recs_old;
+ else
+ unit_num = data->count / adev->umc.retire_unit -
+ control->ras_num_recs;
+
save_count = con->bad_page_num - control->ras_num_bad_pages;
mutex_unlock(&con->recovery_lock);
@@ -3126,7 +3325,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
*new_cnt = unit_num;
/* only new entries are saved */
- if (unit_num > 0) {
+ if (unit_num && save_count) {
/*old asics only save pa to eeprom like before*/
if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
if (amdgpu_ras_eeprom_append(control,
@@ -3179,7 +3378,8 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
/*In V3, there is no pa recs, and some cases(when address==0) may be parsed
as pa recs, so add verion check to avoid it.
*/
- if (control->tbl_hdr.version < RAS_TABLE_VER_V3) {
+ if ((control->tbl_hdr.version < RAS_TABLE_VER_V3) &&
+ !amdgpu_ras_smu_eeprom_supported(adev)) {
for (i = 0; i < control->ras_num_recs; i++) {
if ((control->ras_num_recs - i) >= adev->umc.retire_unit) {
if ((bps[i].address == bps[i + 1].address) &&
@@ -3590,7 +3790,12 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
if (!con || amdgpu_sriov_vf(adev))
return 0;
+ if (amdgpu_uniras_enabled(adev))
+ return 0;
+
control = &con->eeprom_control;
+ con->ras_smu_drv = amdgpu_dpm_get_ras_smu_driver(adev);
+
ret = amdgpu_ras_eeprom_init(control);
control->is_eeprom_valid = !ret;
@@ -3751,7 +3956,9 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
mutex_unlock(&con->recovery_lock);
amdgpu_ras_critical_region_init(adev);
-
+#ifdef CONFIG_X86_MCE_AMD
+ amdgpu_unregister_bad_pages_mca_notifier(adev);
+#endif
return 0;
}
/* recovery end */
@@ -3975,7 +4182,6 @@ static void amdgpu_ras_counte_dw(struct work_struct *work)
atomic_set(&con->ras_ue_count, ue_count);
}
- pm_runtime_mark_last_busy(dev->dev);
Out:
pm_runtime_put_autosuspend(dev->dev);
}
@@ -4584,6 +4790,9 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_
struct ras_event_state *event_state;
int ret = 0;
+ if (amdgpu_uniras_enabled(adev))
+ return 0;
+
if (type >= RAS_EVENT_TYPE_COUNT) {
ret = -EINVAL;
goto out;
@@ -4634,20 +4843,18 @@ u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type
return id;
}
-void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
+int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
{
if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
enum ras_event_type type = RAS_EVENT_TYPE_FATAL;
- u64 event_id;
+ u64 event_id = RAS_EVENT_INVALID_ID;
- if (amdgpu_ras_mark_ras_event(adev, type)) {
- dev_err(adev->dev,
- "uncorrectable hardware error (ERREVENT_ATHUB_INTERRUPT) detected!\n");
- return;
- }
+ if (amdgpu_uniras_enabled(adev))
+ return 0;
- event_id = amdgpu_ras_acquire_event_id(adev, type);
+ if (!amdgpu_ras_mark_ras_event(adev, type))
+ event_id = amdgpu_ras_acquire_event_id(adev, type);
RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error"
"(ERREVENT_ATHUB_INTERRUPT) detected!\n");
@@ -4656,6 +4863,8 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
amdgpu_ras_reset_gpu(adev);
}
+
+ return -EBUSY;
}
bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
@@ -4783,6 +4992,28 @@ static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev)
notifier_registered = true;
}
}
+static void amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ if (!notifier_registered && !mce_adev_list.num_gpu)
+ return;
+ for (i = 0, j = 0; i < mce_adev_list.num_gpu; i++) {
+ if (mce_adev_list.devs[i] == adev)
+ mce_adev_list.devs[i] = NULL;
+ if (!mce_adev_list.devs[i])
+ ++j;
+ }
+
+ if (j == mce_adev_list.num_gpu) {
+ mce_adev_list.num_gpu = 0;
+ /* Unregister x86 notifier with MCE subsystem. */
+ if (notifier_registered) {
+ mce_unregister_decode_chain(&amdgpu_bad_page_nb);
+ notifier_registered = false;
+ }
+ }
+}
#endif
struct amdgpu_ras *amdgpu_ras_get_context(struct amdgpu_device *adev)
@@ -5408,6 +5639,9 @@ bool amdgpu_ras_is_rma(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_ras_mgr_is_rma(adev);
+
if (!con)
return false;
@@ -5490,3 +5724,25 @@ bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr
return ret;
}
+
+void amdgpu_ras_pre_reset(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ if (amdgpu_uniras_enabled(tmp_adev))
+ amdgpu_ras_mgr_pre_reset(tmp_adev);
+ }
+}
+
+void amdgpu_ras_post_reset(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ if (amdgpu_uniras_enabled(tmp_adev))
+ amdgpu_ras_mgr_post_reset(tmp_adev);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 6cf0dfd38be8..ff44190d7d98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -503,7 +503,34 @@ struct ras_critical_region {
uint64_t size;
};
+struct ras_eeprom_table_version {
+ uint32_t minor : 16;
+ uint32_t major : 16;
+};
+
+struct ras_eeprom_smu_funcs {
+ int (*get_ras_table_version)(struct amdgpu_device *adev,
+ uint32_t *table_version);
+ int (*get_badpage_count)(struct amdgpu_device *adev, uint32_t *count, uint32_t timeout);
+ int (*get_badpage_mca_addr)(struct amdgpu_device *adev, uint16_t index, uint64_t *mca_addr);
+ int (*set_timestamp)(struct amdgpu_device *adev, uint64_t timestamp);
+ int (*get_timestamp)(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *timestamp);
+ int (*get_badpage_ipid)(struct amdgpu_device *adev, uint16_t index, uint64_t *ipid);
+ int (*erase_ras_table)(struct amdgpu_device *adev, uint32_t *result);
+};
+
+enum ras_smu_feature_flags {
+ RAS_SMU_FEATURE_BIT__RAS_EEPROM = BIT_ULL(0),
+};
+
+struct ras_smu_drv {
+ const struct ras_eeprom_smu_funcs *smu_eeprom_funcs;
+ void (*ras_smu_feature_flags)(struct amdgpu_device *adev, uint64_t *flags);
+};
+
struct amdgpu_ras {
+ void *ras_mgr;
/* ras infrastructure */
/* for ras itself. */
uint32_t features;
@@ -590,6 +617,10 @@ struct amdgpu_ras {
/* Protect poison injection */
struct mutex poison_lock;
+
+ /* Disable/Enable uniras switch */
+ bool uniras_enabled;
+ const struct ras_smu_drv *ras_smu_drv;
};
struct ras_fs_data {
@@ -909,7 +940,7 @@ static inline void amdgpu_ras_intr_cleared(void)
atomic_set(&amdgpu_ras_in_intr, 0);
}
-void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
+int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
@@ -1008,4 +1039,9 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
const char *fmt, ...);
bool amdgpu_ras_is_rma(struct amdgpu_device *adev);
+
+void amdgpu_ras_pre_reset(struct amdgpu_device *adev,
+ struct list_head *device_list);
+void amdgpu_ras_post_reset(struct amdgpu_device *adev,
+ struct list_head *device_list);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 3eb3fb55ccb0..64dd7a81bff5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -32,6 +32,7 @@
#include <linux/uaccess.h>
#include "amdgpu_reset.h"
+#include "amdgpu_ras_mgr.h"
/* These are memory addresses as would be seen by one or more EEPROM
* chips strung on the I2C bus, usually by manipulating pins 1-3 of a
@@ -123,6 +124,8 @@
RAS_TABLE_V2_1_INFO_SIZE) \
/ RAS_TABLE_RECORD_SIZE)
+#define RAS_SMU_MESSAGE_TIMEOUT_MS 1000 /* 1s */
+
/* Given a zero-based index of an EEPROM RAS record, yields the EEPROM
* offset off of RAS_TABLE_START. That is, this is something you can
* add to control->i2c_address, and then tell I2C layer to read
@@ -443,40 +446,57 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ u32 erase_res = 0;
u8 csum;
int res;
mutex_lock(&control->ras_tbl_mutex);
- hdr->header = RAS_TABLE_HDR_VAL;
- amdgpu_ras_set_eeprom_table_version(control);
+ if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+ hdr->header = RAS_TABLE_HDR_VAL;
+ amdgpu_ras_set_eeprom_table_version(control);
- if (hdr->version >= RAS_TABLE_VER_V2_1) {
- hdr->first_rec_offset = RAS_RECORD_START_V2_1;
- hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
- RAS_TABLE_V2_1_INFO_SIZE;
- rai->rma_status = GPU_HEALTH_USABLE;
- /**
- * GPU health represented as a percentage.
- * 0 means worst health, 100 means fully health.
- */
- rai->health_percent = 100;
- /* ecc_page_threshold = 0 means disable bad page retirement */
- rai->ecc_page_threshold = con->bad_page_cnt_threshold;
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
+ hdr->first_rec_offset = RAS_RECORD_START_V2_1;
+ hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE;
+ rai->rma_status = GPU_HEALTH_USABLE;
+
+ control->ras_record_offset = RAS_RECORD_START_V2_1;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
+ /**
+ * GPU health represented as a percentage.
+ * 0 means worst health, 100 means fully health.
+ */
+ rai->health_percent = 100;
+ /* ecc_page_threshold = 0 means disable bad page retirement */
+ rai->ecc_page_threshold = con->bad_page_cnt_threshold;
+ } else {
+ hdr->first_rec_offset = RAS_RECORD_START;
+ hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
+
+ control->ras_record_offset = RAS_RECORD_START;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+ }
+
+ csum = __calc_hdr_byte_sum(control);
+ if (hdr->version >= RAS_TABLE_VER_V2_1)
+ csum += __calc_ras_info_byte_sum(control);
+ csum = -csum;
+ hdr->checksum = csum;
+ res = __write_table_header(control);
+ if (!res && hdr->version > RAS_TABLE_VER_V1)
+ res = __write_table_ras_info(control);
} else {
- hdr->first_rec_offset = RAS_RECORD_START;
- hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
+ res = amdgpu_ras_smu_erase_ras_table(adev, &erase_res);
+ if (res || erase_res) {
+ dev_warn(adev->dev, "RAS EEPROM reset failed, res:%d result:%d",
+ res, erase_res);
+ if (!res)
+ res = -EIO;
+ }
}
- csum = __calc_hdr_byte_sum(control);
- if (hdr->version >= RAS_TABLE_VER_V2_1)
- csum += __calc_ras_info_byte_sum(control);
- csum = -csum;
- hdr->checksum = csum;
- res = __write_table_header(control);
- if (!res && hdr->version > RAS_TABLE_VER_V1)
- res = __write_table_ras_info(control);
-
control->ras_num_recs = 0;
control->ras_num_bad_pages = 0;
control->ras_num_mca_recs = 0;
@@ -556,6 +576,9 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_ras_mgr_check_eeprom_safety_watermark(adev);
+
if (!__is_ras_eeprom_supported(adev) ||
!amdgpu_bad_page_threshold)
return false;
@@ -766,7 +789,8 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
"Saved bad pages %d reaches threshold value %d\n",
control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
- if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev))
+ if (adev->cper.enabled && !amdgpu_uniras_enabled(adev) &&
+ amdgpu_cper_generate_bp_threshold_record(adev))
dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
if ((amdgpu_bad_page_threshold != -1) &&
@@ -849,6 +873,71 @@ Out:
return res;
}
+int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ int ret, retry = 20;
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return 0;
+
+ control->ras_num_recs_old = control->ras_num_recs;
+
+ do {
+ /* 1000ms timeout is long enough, smu_get_badpage_count won't
+ * return -EBUSY before timeout.
+ */
+ ret = amdgpu_ras_smu_get_badpage_count(adev,
+ &(control->ras_num_recs), RAS_SMU_MESSAGE_TIMEOUT_MS);
+ if (!ret &&
+ (control->ras_num_recs_old == control->ras_num_recs)) {
+ /* record number update in PMFW needs some time,
+ * smu_get_badpage_count may return immediately without
+ * count update, sleep for a while and retry again.
+ */
+ msleep(50);
+ retry--;
+ } else {
+ break;
+ }
+ } while (retry);
+
+ /* no update of record number is not a real failure,
+ * don't print warning here
+ */
+ if (!ret && (control->ras_num_recs_old == control->ras_num_recs))
+ ret = -EINVAL;
+
+ return ret;
+}
+
+static int amdgpu_ras_smu_eeprom_append(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev) || !con)
+ return 0;
+
+ control->ras_num_bad_pages = con->bad_page_num;
+
+ if (amdgpu_bad_page_threshold != 0 &&
+ control->ras_num_bad_pages > con->bad_page_cnt_threshold) {
+ dev_warn(adev->dev,
+ "Saved bad pages %d reaches threshold value %d\n",
+ control->ras_num_bad_pages, con->bad_page_cnt_threshold);
+
+ if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev))
+ dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
+
+ if ((amdgpu_bad_page_threshold != -1) &&
+ (amdgpu_bad_page_threshold != -2))
+ con->is_rma = true;
+ }
+
+ return 0;
+}
+
/**
* amdgpu_ras_eeprom_append -- append records to the EEPROM RAS table
* @control: pointer to control structure
@@ -873,6 +962,9 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
if (!__is_ras_eeprom_supported(adev))
return 0;
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_smu_eeprom_append(control);
+
if (num == 0) {
dev_err(adev->dev, "will not append 0 records\n");
return -EINVAL;
@@ -948,6 +1040,50 @@ static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
return res;
}
+int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record, u32 rec_idx,
+ const u32 num)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ uint64_t ts, end_idx;
+ int i, ret;
+ u64 mca, ipid;
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return 0;
+
+ if (!adev->umc.ras || !adev->umc.ras->mca_ipid_parse)
+ return -EOPNOTSUPP;
+
+ end_idx = rec_idx + num;
+ for (i = rec_idx; i < end_idx; i++) {
+ ret = amdgpu_ras_smu_get_badpage_mca_addr(adev, i, &mca);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_ras_smu_get_badpage_ipid(adev, i, &ipid);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_ras_smu_get_timestamp(adev, i, &ts);
+ if (ret)
+ return ret;
+
+ record[i - rec_idx].address = mca;
+ /* retired_page (pa) is unused now */
+ record[i - rec_idx].retired_page = 0x1ULL;
+ record[i - rec_idx].ts = ts;
+ record[i - rec_idx].err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+
+ adev->umc.ras->mca_ipid_parse(adev, ipid,
+ (uint32_t *)&(record[i - rec_idx].cu),
+ (uint32_t *)&(record[i - rec_idx].mem_channel),
+ (uint32_t *)&(record[i - rec_idx].mcumc_id), NULL);
+ }
+
+ return 0;
+}
+
/**
* amdgpu_ras_eeprom_read -- read EEPROM
* @control: pointer to control structure
@@ -969,6 +1105,9 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
u8 *buf, *pp;
u32 g0, g1;
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_eeprom_read_idx(control, record, 0, num);
+
if (!__is_ras_eeprom_supported(adev))
return 0;
@@ -1140,6 +1279,10 @@ static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf,
int res = -EFAULT;
size_t data_len;
+ /* pmfw manages eeprom data by itself */
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return 0;
+
mutex_lock(&control->ras_tbl_mutex);
/* We want *pos - data_len > 0, which means there's
@@ -1370,6 +1513,42 @@ Out:
return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res;
}
+static int amdgpu_ras_smu_eeprom_init(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ uint64_t local_time;
+ int res;
+
+ ras->is_rma = false;
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+ mutex_init(&control->ras_tbl_mutex);
+
+ res = amdgpu_ras_smu_get_table_version(adev, &(hdr->version));
+ if (res)
+ return res;
+
+ res = amdgpu_ras_smu_get_badpage_count(adev,
+ &(control->ras_num_recs), 100);
+ if (res)
+ return res;
+
+ local_time = (uint64_t)ktime_get_real_seconds();
+ res = amdgpu_ras_smu_set_timestamp(adev, local_time);
+ if (res)
+ return res;
+
+ control->ras_max_record_count = 4000;
+
+ control->ras_num_mca_recs = 0;
+ control->ras_num_pa_recs = 0;
+
+ return 0;
+}
+
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
@@ -1378,6 +1557,9 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
int res;
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_smu_eeprom_init(control);
+
ras->is_rma = false;
if (!__is_ras_eeprom_supported(adev))
@@ -1444,6 +1626,47 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
return 0;
}
+static int amdgpu_ras_smu_eeprom_check(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+
+ control->ras_num_bad_pages = ras->bad_page_num;
+
+ if ((ras->bad_page_cnt_threshold < control->ras_num_bad_pages) &&
+ amdgpu_bad_page_threshold != 0) {
+ dev_warn(adev->dev,
+ "RAS records:%d exceed threshold:%d\n",
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+ if ((amdgpu_bad_page_threshold == -1) ||
+ (amdgpu_bad_page_threshold == -2)) {
+ dev_warn(adev->dev,
+ "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n");
+ } else {
+ ras->is_rma = true;
+ dev_warn(adev->dev,
+ "User defined threshold is set, runtime service will be halt when threshold is reached\n");
+ }
+
+ return 0;
+ }
+
+ dev_dbg(adev->dev,
+ "Found existing EEPROM table with %d records",
+ control->ras_num_bad_pages);
+
+ /* Warn if we are at 90% of the threshold or above
+ */
+ if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold)
+ dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
+ control->ras_num_bad_pages,
+ ras->bad_page_cnt_threshold);
+ return 0;
+}
+
int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
@@ -1451,6 +1674,9 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
int res = 0;
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_smu_eeprom_check(control);
+
if (!__is_ras_eeprom_supported(adev))
return 0;
@@ -1541,7 +1767,8 @@ void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev)
struct amdgpu_ras_eeprom_control *control;
int res;
- if (!__is_ras_eeprom_supported(adev) || !ras)
+ if (!__is_ras_eeprom_supported(adev) || !ras ||
+ amdgpu_ras_smu_eeprom_supported(adev))
return;
control = &ras->eeprom_control;
if (!control->is_eeprom_valid)
@@ -1561,4 +1788,143 @@ void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev)
control->is_eeprom_valid = false;
}
return;
-} \ No newline at end of file
+}
+
+static const struct ras_smu_drv *amdgpu_ras_get_smu_ras_drv(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (!ras)
+ return NULL;
+
+ return ras->ras_smu_drv;
+}
+
+static uint64_t amdgpu_ras_smu_get_feature_flags(struct amdgpu_device *adev)
+{
+ const struct ras_smu_drv *ras_smu_drv = amdgpu_ras_get_smu_ras_drv(adev);
+ uint64_t flags = 0ULL;
+
+ if (!ras_smu_drv)
+ goto out;
+
+ if (ras_smu_drv->ras_smu_feature_flags)
+ ras_smu_drv->ras_smu_feature_flags(adev, &flags);
+
+out:
+ return flags;
+}
+
+bool amdgpu_ras_smu_eeprom_supported(struct amdgpu_device *adev)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+ uint64_t flags = 0ULL;
+
+ if (!__is_ras_eeprom_supported(adev) || !smu_ras_drv)
+ return false;
+
+ if (!smu_ras_drv->smu_eeprom_funcs)
+ return false;
+
+ flags = amdgpu_ras_smu_get_feature_flags(adev);
+
+ return !!(flags & RAS_SMU_FEATURE_BIT__RAS_EEPROM);
+}
+
+int amdgpu_ras_smu_get_table_version(struct amdgpu_device *adev,
+ uint32_t *table_version)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->get_ras_table_version)
+ return smu_ras_drv->smu_eeprom_funcs->get_ras_table_version(adev,
+ table_version);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_badpage_count(struct amdgpu_device *adev,
+ uint32_t *count, uint32_t timeout)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->get_badpage_count)
+ return smu_ras_drv->smu_eeprom_funcs->get_badpage_count(adev,
+ count, timeout);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_badpage_mca_addr(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *mca_addr)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->get_badpage_mca_addr)
+ return smu_ras_drv->smu_eeprom_funcs->get_badpage_mca_addr(adev,
+ index, mca_addr);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_set_timestamp(struct amdgpu_device *adev,
+ uint64_t timestamp)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->set_timestamp)
+ return smu_ras_drv->smu_eeprom_funcs->set_timestamp(adev,
+ timestamp);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_timestamp(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *timestamp)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->get_timestamp)
+ return smu_ras_drv->smu_eeprom_funcs->get_timestamp(adev,
+ index, timestamp);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *ipid)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->get_badpage_ipid)
+ return smu_ras_drv->smu_eeprom_funcs->get_badpage_ipid(adev,
+ index, ipid);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev,
+ uint32_t *result)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->erase_ras_table)
+ return smu_ras_drv->smu_eeprom_funcs->erase_ras_table(adev,
+ result);
+ return -EOPNOTSUPP;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index ebfca4cb5688..2e5d63957e71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -82,6 +82,7 @@ struct amdgpu_ras_eeprom_control {
/* Number of records in the table.
*/
u32 ras_num_recs;
+ u32 ras_num_recs_old;
/* the bad page number is ras_num_recs or
* ras_num_recs * umc.retire_unit
@@ -163,6 +164,35 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control);
void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev);
+bool amdgpu_ras_smu_eeprom_supported(struct amdgpu_device *adev);
+
+int amdgpu_ras_smu_get_table_version(struct amdgpu_device *adev,
+ uint32_t *table_version);
+
+int amdgpu_ras_smu_get_badpage_count(struct amdgpu_device *adev,
+ uint32_t *count, uint32_t timeout);
+
+int amdgpu_ras_smu_get_badpage_mca_addr(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *mca_addr);
+
+int amdgpu_ras_smu_set_timestamp(struct amdgpu_device *adev,
+ uint64_t timestamp);
+
+int amdgpu_ras_smu_get_timestamp(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *timestamp);
+
+int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *ipid);
+
+int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev,
+ uint32_t *result);
+
+int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record, u32 rec_idx,
+ const u32 num);
+
+int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control);
+
extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 5ec5c3ff22bb..c596b6df2e2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -33,6 +33,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
+#include "amdgpu_ras_mgr.h"
#include "atom.h"
/*
@@ -159,8 +160,16 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
*/
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
- while (ib->length_dw & ring->funcs->align_mask)
- ib->ptr[ib->length_dw++] = ring->funcs->nop;
+ u32 align_mask = ring->funcs->align_mask;
+ u32 count = ib->length_dw & align_mask;
+
+ if (count) {
+ count = align_mask + 1 - count;
+
+ memset32(&ib->ptr[ib->length_dw], ring->funcs->nop, count);
+
+ ib->length_dw += count;
+ }
}
/**
@@ -460,9 +469,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
ktime_t deadline;
bool ret;
- if (unlikely(ring->adev->debug_disable_soft_recovery))
- return false;
-
deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
@@ -490,6 +496,66 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
*/
#if defined(CONFIG_DEBUG_FS)
+static ssize_t amdgpu_ras_cper_debugfs_read(struct file *f, char __user *buf,
+ size_t size, loff_t *offset)
+{
+ const uint8_t ring_header_size = 12;
+ struct amdgpu_ring *ring = file_inode(f)->i_private;
+ struct ras_cmd_cper_snapshot_req *snapshot_req __free(kfree) =
+ kzalloc(sizeof(struct ras_cmd_cper_snapshot_req), GFP_KERNEL);
+ struct ras_cmd_cper_snapshot_rsp *snapshot_rsp __free(kfree) =
+ kzalloc(sizeof(struct ras_cmd_cper_snapshot_rsp), GFP_KERNEL);
+ struct ras_cmd_cper_record_req *record_req __free(kfree) =
+ kzalloc(sizeof(struct ras_cmd_cper_record_req), GFP_KERNEL);
+ struct ras_cmd_cper_record_rsp *record_rsp __free(kfree) =
+ kzalloc(sizeof(struct ras_cmd_cper_record_rsp), GFP_KERNEL);
+ uint8_t *ring_header __free(kfree) =
+ kzalloc(ring_header_size, GFP_KERNEL);
+ uint32_t total_cper_num;
+ uint64_t start_cper_id;
+ int r;
+
+ if (!snapshot_req || !snapshot_rsp || !record_req || !record_rsp ||
+ !ring_header)
+ return -ENOMEM;
+
+ if (!(*offset)) {
+ /* Need at least 12 bytes for the header on the first read */
+ if (size < ring_header_size)
+ return -EINVAL;
+
+ if (copy_to_user(buf, ring_header, ring_header_size))
+ return -EFAULT;
+ buf += ring_header_size;
+ size -= ring_header_size;
+ }
+
+ r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev,
+ RAS_CMD__GET_CPER_SNAPSHOT,
+ snapshot_req, sizeof(struct ras_cmd_cper_snapshot_req),
+ snapshot_rsp, sizeof(struct ras_cmd_cper_snapshot_rsp));
+ if (r || !snapshot_rsp->total_cper_num)
+ return r;
+
+ start_cper_id = snapshot_rsp->start_cper_id;
+ total_cper_num = snapshot_rsp->total_cper_num;
+
+ record_req->buf_ptr = (uint64_t)(uintptr_t)buf;
+ record_req->buf_size = size;
+ record_req->cper_start_id = start_cper_id + *offset;
+ record_req->cper_num = total_cper_num;
+ r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev, RAS_CMD__GET_CPER_RECORD,
+ record_req, sizeof(struct ras_cmd_cper_record_req),
+ record_rsp, sizeof(struct ras_cmd_cper_record_rsp));
+ if (r)
+ return r;
+
+ r = *offset ? record_rsp->real_data_size : record_rsp->real_data_size + ring_header_size;
+ (*offset) += record_rsp->real_cper_num;
+
+ return r;
+}
+
/* Layout of file is 12 bytes consisting of
* - rptr
* - wptr
@@ -506,6 +572,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
loff_t i;
int r;
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER && amdgpu_uniras_enabled(ring->adev))
+ return amdgpu_ras_cper_debugfs_read(f, buf, size, pos);
+
if (*pos & 3 || size & 3)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4b46e3c26ff3..7a27c6c4bb44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -83,6 +83,7 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_MES,
AMDGPU_RING_TYPE_UMSCH_MM,
AMDGPU_RING_TYPE_CPER,
+ AMDGPU_RING_TYPE_MAX,
};
enum amdgpu_ib_pool_type {
@@ -147,16 +148,14 @@ struct amdgpu_fence {
u64 wptr;
/* fence context for resets */
u64 context;
- uint32_t seq;
};
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
-void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af);
-void amdgpu_fence_save_wptr(struct dma_fence *fence);
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
@@ -166,8 +165,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
- struct amdgpu_fence *af, unsigned int flags);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+ unsigned int flags);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
uint32_t timeout);
bool amdgpu_fence_process(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
index 41ebe690eeff..3739be1b71e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -159,7 +159,6 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
dev_err(adev->dev, "Invalid input: %s\n", str);
}
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 9d568c16beb1..2b931e855abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -188,7 +188,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
struct amdgpu_job *job;
void *cpu_addr;
uint64_t flags;
- unsigned int i;
int r;
BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
@@ -255,16 +254,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags, cpu_addr);
} else {
- dma_addr_t dma_address;
-
- dma_address = mm_cur->start;
- dma_address += adev->vm_manager.vram_base_offset;
+ u64 pa = mm_cur->start + adev->vm_manager.vram_base_offset;
- for (i = 0; i < num_pages; ++i) {
- amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, &dma_address,
- flags, cpu_addr);
- dma_address += PAGE_SIZE;
- }
+ amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr);
}
dma_fence_put(amdgpu_job_submit(job));
@@ -286,12 +278,13 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
* move and different for a BO to BO copy.
*
*/
-int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
- const struct amdgpu_copy_mem *src,
- const struct amdgpu_copy_mem *dst,
- uint64_t size, bool tmz,
- struct dma_resv *resv,
- struct dma_fence **f)
+__attribute__((nonnull))
+static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+ const struct amdgpu_copy_mem *src,
+ const struct amdgpu_copy_mem *dst,
+ uint64_t size, bool tmz,
+ struct dma_resv *resv,
+ struct dma_fence **f)
{
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_res_cursor src_mm, dst_mm;
@@ -365,9 +358,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
}
error:
mutex_unlock(&adev->mman.gtt_window_lock);
- if (f)
- *f = dma_fence_get(fence);
- dma_fence_put(fence);
+ *f = fence;
return r;
}
@@ -706,10 +697,11 @@ struct amdgpu_ttm_tt {
* memory and start HMM tracking CPU page table update
*
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
- * once afterwards to stop HMM tracking
+ * once afterwards to stop HMM tracking. Its the caller responsibility to ensure
+ * that range is a valid memory and it is freed too.
*/
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
- struct hmm_range **range)
+ struct amdgpu_hmm_range *range)
{
struct ttm_tt *ttm = bo->tbo.ttm;
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
@@ -719,9 +711,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
bool readonly;
int r = 0;
- /* Make sure get_user_pages_done() can cleanup gracefully */
- *range = NULL;
-
mm = bo->notifier.mm;
if (unlikely(!mm)) {
DRM_DEBUG_DRIVER("BO is not registered?\n");
@@ -756,38 +745,6 @@ out_unlock:
return r;
}
-/* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations
- */
-void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
- struct hmm_range *range)
-{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
-
- if (gtt && gtt->userptr && range)
- amdgpu_hmm_range_get_pages_done(range);
-}
-
-/*
- * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change
- * Check if the pages backing this ttm range have been invalidated
- *
- * Returns: true if pages are still valid
- */
-bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
- struct hmm_range *range)
-{
- struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
-
- if (!gtt || !gtt->userptr || !range)
- return false;
-
- DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
- gtt->userptr, ttm->num_pages);
-
- WARN_ONCE(!range->hmm_pfns, "No user pages to check\n");
-
- return !amdgpu_hmm_range_get_pages_done(range);
-}
#endif
/*
@@ -797,12 +754,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
* that backs user memory and will ultimately be mapped into the device
* address space.
*/
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range)
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range)
{
unsigned long i;
for (i = 0; i < ttm->num_pages; ++i)
- ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_pfns[i]) : NULL;
+ ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_range.hmm_pfns[i]) : NULL;
}
/*
@@ -1529,6 +1486,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
if (r)
goto out;
+ mutex_lock(&adev->mman.gtt_window_lock);
amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
src_mm.start;
@@ -1543,6 +1501,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
WARN_ON(job->ibs[0].length_dw > num_dw);
fence = amdgpu_job_submit(job);
+ mutex_unlock(&adev->mman.gtt_window_lock);
if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
r = -ETIMEDOUT;
@@ -1804,18 +1763,14 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
}
- if (!adev->gmc.is_app_apu) {
- ret = amdgpu_bo_create_kernel_at(
- adev, adev->gmc.real_vram_size - reserve_size,
- reserve_size, &adev->mman.fw_reserved_memory, NULL);
- if (ret) {
- dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret);
- amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
- NULL, NULL);
- return ret;
- }
- } else {
- DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n");
+ ret = amdgpu_bo_create_kernel_at(
+ adev, adev->gmc.real_vram_size - reserve_size, reserve_size,
+ &adev->mman.fw_reserved_memory, NULL);
+ if (ret) {
+ dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+ NULL);
+ return ret;
}
return 0;
@@ -1837,7 +1792,7 @@ static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
adev->gmc.mem_partitions[i].numa.node,
- false, false);
+ TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
}
return 0;
}
@@ -1930,8 +1885,11 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
adev_to_drm(adev)->anon_inode->i_mapping,
adev_to_drm(adev)->vma_offset_manager,
- adev->need_swiotlb,
- dma_addressing_limited(adev->dev));
+ (adev->need_swiotlb ?
+ TTM_ALLOCATION_POOL_USE_DMA_ALLOC : 0) |
+ (dma_addressing_limited(adev->dev) ?
+ TTM_ALLOCATION_POOL_USE_DMA32 : 0) |
+ TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
if (r) {
dev_err(adev->dev,
"failed initializing buffer object driver(%d).\n", r);
@@ -1980,19 +1938,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
/*
- *The reserved vram for driver must be pinned to the specified
- *place on the VRAM, so reserve it early.
+ * The reserved VRAM for the driver must be pinned to a specific
+ * location in VRAM, so reserve it early.
*/
r = amdgpu_ttm_drv_reserve_vram_init(adev);
if (r)
return r;
/*
- * only NAVI10 and onwards ASIC support for IP discovery.
- * If IP discovery enabled, a block of memory should be
- * reserved for IP discovey.
+ * only NAVI10 and later ASICs support IP discovery.
+ * If IP discovery is enabled, a block of memory should be
+ * reserved for it.
*/
- if (adev->mman.discovery_bin) {
+ if (adev->discovery.reserve_tmr) {
r = amdgpu_ttm_reserve_tmr(adev);
if (r)
return r;
@@ -2229,8 +2187,10 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
} else {
drm_sched_entity_destroy(&adev->mman.high_pr);
drm_sched_entity_destroy(&adev->mman.low_pr);
- dma_fence_put(man->move);
- man->move = NULL;
+ /* Drop all the old fences since re-creating the scheduler entities
+ * will allocate new contexts.
+ */
+ ttm_resource_manager_cleanup(man);
}
/* this just adjusts TTM size idea, which sets lpfn to the correct value */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 0be2728aa872..577ee04ce0bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -28,6 +28,7 @@
#include <drm/gpu_scheduler.h>
#include <drm/ttm/ttm_placement.h>
#include "amdgpu_vram_mgr.h"
+#include "amdgpu_hmm.h"
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
@@ -82,9 +83,6 @@ struct amdgpu_mman {
uint64_t stolen_reserved_offset;
uint64_t stolen_reserved_size;
- /* discovery */
- uint8_t *discovery_bin;
- uint32_t discovery_tmr_size;
/* fw reserved memory */
struct amdgpu_bo *fw_reserved_memory;
struct amdgpu_bo *fw_reserved_memory_extend;
@@ -170,12 +168,6 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
bool vm_needs_flush, uint32_t copy_flags);
-int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
- const struct amdgpu_copy_mem *src,
- const struct amdgpu_copy_mem *dst,
- uint64_t size, bool tmz,
- struct dma_resv *resv,
- struct dma_fence **f);
int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
struct dma_resv *resv,
struct dma_fence **fence);
@@ -192,29 +184,16 @@ uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
- struct hmm_range **range);
-void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
- struct hmm_range *range);
-bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
- struct hmm_range *range);
+ struct amdgpu_hmm_range *range);
#else
static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
- struct hmm_range **range)
+ struct amdgpu_hmm_range *range)
{
return -EPERM;
}
-static inline void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
- struct hmm_range *range)
-{
-}
-static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
- struct hmm_range *range)
-{
- return false;
-}
#endif
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range);
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range);
int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
uint64_t *user_addr);
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 2e039fb778ea..3f0b0e9af4f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -24,6 +24,7 @@
#include <linux/sort.h>
#include "amdgpu.h"
#include "umc_v6_7.h"
+#include "amdgpu_ras_mgr.h"
#define MAX_UMC_POISON_POLLING_TIME_SYNC 20 //ms
#define MAX_UMC_HASH_STRING_SIZE 256
@@ -96,67 +97,96 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control = &con->eeprom_control;
unsigned int error_query_mode;
int ret = 0;
unsigned long err_count;
amdgpu_ras_get_error_query_mode(adev, &error_query_mode);
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if (!err_data->err_addr)
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error address record!\n");
+ else
+ err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
+
mutex_lock(&con->page_retirement_lock);
- ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
- if (ret == -EOPNOTSUPP &&
- error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
- if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
- adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
- adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status);
-
- if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
- adev->umc.ras->ras_block.hw_ops->query_ras_error_address &&
- adev->umc.max_ras_err_cnt_per_query) {
- err_data->err_addr =
- kcalloc(adev->umc.max_ras_err_cnt_per_query,
- sizeof(struct eeprom_table_record), GFP_KERNEL);
-
- /* still call query_ras_error_address to clear error status
- * even NOMEM error is encountered
- */
- if(!err_data->err_addr)
- dev_warn(adev->dev, "Failed to alloc memory for "
- "umc error address record!\n");
- else
- err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
-
- /* umc query_ras_error_address is also responsible for clearing
- * error status
- */
- adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status);
+ if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+ ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
+ if (ret == -EOPNOTSUPP &&
+ error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev,
+ ras_error_status);
+
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address &&
+ adev->umc.max_ras_err_cnt_per_query) {
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if (!err_data->err_addr)
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error address record!\n");
+ else
+ err_data->err_addr_len =
+ adev->umc.max_ras_err_cnt_per_query;
+
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev,
+ ras_error_status);
+ }
+ } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY ||
+ (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) {
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_count)
+ adev->umc.ras->ecc_info_query_ras_error_count(adev,
+ ras_error_status);
+
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_address &&
+ adev->umc.max_ras_err_cnt_per_query) {
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if (!err_data->err_addr)
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error address record!\n");
+ else
+ err_data->err_addr_len =
+ adev->umc.max_ras_err_cnt_per_query;
+
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ adev->umc.ras->ecc_info_query_ras_error_address(adev,
+ ras_error_status);
+ }
}
- } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY ||
- (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) {
- if (adev->umc.ras &&
- adev->umc.ras->ecc_info_query_ras_error_count)
- adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status);
-
- if (adev->umc.ras &&
- adev->umc.ras->ecc_info_query_ras_error_address &&
- adev->umc.max_ras_err_cnt_per_query) {
- err_data->err_addr =
- kcalloc(adev->umc.max_ras_err_cnt_per_query,
- sizeof(struct eeprom_table_record), GFP_KERNEL);
-
- /* still call query_ras_error_address to clear error status
- * even NOMEM error is encountered
- */
- if(!err_data->err_addr)
- dev_warn(adev->dev, "Failed to alloc memory for "
- "umc error address record!\n");
- else
- err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
-
- /* umc query_ras_error_address is also responsible for clearing
- * error status
- */
- adev->umc.ras->ecc_info_query_ras_error_address(adev, ras_error_status);
+ } else {
+ if (!amdgpu_ras_eeprom_update_record_num(control)) {
+ err_data->err_addr_cnt = err_data->de_count =
+ control->ras_num_recs - control->ras_num_recs_old;
+ amdgpu_ras_eeprom_read_idx(control, err_data->err_addr,
+ control->ras_num_recs_old, err_data->de_count);
}
}
@@ -166,7 +196,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
if ((amdgpu_bad_page_threshold != 0) &&
err_data->err_addr_cnt) {
amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
- err_data->err_addr_cnt, false);
+ err_data->err_addr_cnt, amdgpu_ras_smu_eeprom_supported(adev));
amdgpu_ras_save_bad_pages(adev, &err_count);
amdgpu_dpm_send_hbm_bad_pages_num(adev,
@@ -244,6 +274,15 @@ int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
}
amdgpu_ras_error_data_fini(&err_data);
+ } else if (amdgpu_uniras_enabled(adev)) {
+ struct ras_ih_info ih_info = {0};
+
+ ih_info.block = block;
+ ih_info.pasid = pasid;
+ ih_info.reset = reset;
+ ih_info.pasid_fn = pasid_fn;
+ ih_info.data = data;
+ amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info);
} else {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
int ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index ec203f9e5ffa..28dff750c47e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -113,6 +113,8 @@ struct amdgpu_umc_ras {
uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
uint64_t mca_addr, uint64_t retired_page);
void (*get_retire_flip_bits)(struct amdgpu_device *adev);
+ void (*mca_ipid_parse)(struct amdgpu_device *adev, uint64_t ipid,
+ uint32_t *did, uint32_t *ch, uint32_t *umc_inst, uint32_t *sid);
};
struct amdgpu_umc_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 1add21160d21..9a969175900e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -25,10 +25,13 @@
#include <drm/drm_auth.h>
#include <drm/drm_exec.h>
#include <linux/pm_runtime.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
+#include "amdgpu_reset.h"
#include "amdgpu_vm.h"
#include "amdgpu_userq.h"
+#include "amdgpu_hmm.h"
#include "amdgpu_userq_fence.h"
u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
@@ -44,10 +47,130 @@ u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
return userq_ip_mask;
}
-int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
- u64 expected_size)
+static bool amdgpu_userq_is_reset_type_supported(struct amdgpu_device *adev,
+ enum amdgpu_ring_type ring_type, int reset_type)
+{
+
+ if (ring_type < 0 || ring_type >= AMDGPU_RING_TYPE_MAX)
+ return false;
+
+ switch (ring_type) {
+ case AMDGPU_RING_TYPE_GFX:
+ if (adev->gfx.gfx_supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ if (adev->gfx.compute_supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ if (adev->sdma.supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_VCN_DEC:
+ case AMDGPU_RING_TYPE_VCN_ENC:
+ if (adev->vcn.supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ if (adev->jpeg.supported_reset & reset_type)
+ return true;
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+static void amdgpu_userq_gpu_reset(struct amdgpu_device *adev)
+{
+ if (amdgpu_device_should_recover_gpu(adev)) {
+ amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->userq_reset_work);
+ /* Wait for the reset job to complete */
+ flush_work(&adev->userq_reset_work);
+ }
+}
+
+static int
+amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const int queue_types[] = {
+ AMDGPU_RING_TYPE_COMPUTE,
+ AMDGPU_RING_TYPE_GFX,
+ AMDGPU_RING_TYPE_SDMA
+ };
+ const int num_queue_types = ARRAY_SIZE(queue_types);
+ bool gpu_reset = false;
+ int r = 0;
+ int i;
+
+ /* Warning if current process mutex is not held */
+ WARN_ON(!mutex_is_locked(&uq_mgr->userq_mutex));
+
+ if (unlikely(adev->debug_disable_gpu_ring_reset)) {
+ dev_err(adev->dev, "userq reset disabled by debug mask\n");
+ return 0;
+ }
+
+ /*
+ * If GPU recovery feature is disabled system-wide,
+ * skip all reset detection logic
+ */
+ if (!amdgpu_gpu_recovery)
+ return 0;
+
+ /*
+ * Iterate through all queue types to detect and reset problematic queues
+ * Process each queue type in the defined order
+ */
+ for (i = 0; i < num_queue_types; i++) {
+ int ring_type = queue_types[i];
+ const struct amdgpu_userq_funcs *funcs = adev->userq_funcs[ring_type];
+
+ if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, AMDGPU_RESET_TYPE_PER_QUEUE))
+ continue;
+
+ if (atomic_read(&uq_mgr->userq_count[ring_type]) > 0 &&
+ funcs && funcs->detect_and_reset) {
+ r = funcs->detect_and_reset(adev, ring_type);
+ if (r) {
+ gpu_reset = true;
+ break;
+ }
+ }
+ }
+
+ if (gpu_reset)
+ amdgpu_userq_gpu_reset(adev);
+
+ return r;
+}
+
+static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue,
+ struct amdgpu_bo_va_mapping *va_map, u64 addr)
+{
+ struct amdgpu_userq_va_cursor *va_cursor;
+ struct userq_va_list;
+
+ va_cursor = kzalloc(sizeof(*va_cursor), GFP_KERNEL);
+ if (!va_cursor)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&va_cursor->list);
+ va_cursor->gpu_addr = addr;
+ atomic_set(&va_map->bo_va->userq_va_mapped, 1);
+ list_add(&va_cursor->list, &queue->userq_va_list);
+
+ return 0;
+}
+
+int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue,
+ u64 addr, u64 expected_size)
{
struct amdgpu_bo_va_mapping *va_map;
+ struct amdgpu_vm *vm = queue->vm;
u64 user_addr;
u64 size;
int r = 0;
@@ -67,6 +190,7 @@ int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
/* Only validate the userq whether resident in the VM mapping range */
if (user_addr >= va_map->start &&
va_map->last - user_addr + 1 >= size) {
+ amdgpu_userq_buffer_va_list_add(queue, va_map, user_addr);
amdgpu_bo_unreserve(vm->root.bo);
return 0;
}
@@ -77,6 +201,76 @@ out_err:
return r;
}
+static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ bool r;
+
+ if (amdgpu_bo_reserve(vm->root.bo, false))
+ return false;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
+ if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped))
+ r = true;
+ else
+ r = false;
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ return r;
+}
+
+static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_userq_va_cursor *va_cursor, *tmp;
+ int r = 0;
+
+ list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
+ r += amdgpu_userq_buffer_va_mapped(queue->vm, va_cursor->gpu_addr);
+ dev_dbg(queue->userq_mgr->adev->dev,
+ "validate the userq mapping:%p va:%llx r:%d\n",
+ queue, va_cursor->gpu_addr, r);
+ }
+
+ if (r != 0)
+ return true;
+
+ return false;
+}
+
+static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_userq_va_cursor *va_cursor)
+{
+ atomic_set(&mapping->bo_va->userq_va_mapped, 0);
+ list_del(&va_cursor->list);
+ kfree(va_cursor);
+}
+
+static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_userq_va_cursor *va_cursor, *tmp;
+ struct amdgpu_bo_va_mapping *mapping;
+ int r;
+
+ r = amdgpu_bo_reserve(queue->vm->root.bo, false);
+ if (r)
+ return r;
+
+ list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
+ mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr);
+ if (!mapping) {
+ r = -EINVAL;
+ goto err;
+ }
+ dev_dbg(adev->dev, "delete the userq:%p va:%llx\n",
+ queue, va_cursor->gpu_addr);
+ amdgpu_userq_buffer_va_list_del(mapping, va_cursor);
+ }
+err:
+ amdgpu_bo_unreserve(queue->vm->root.bo);
+ return r;
+}
+
static int
amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue)
@@ -84,17 +278,22 @@ amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs =
adev->userq_funcs[queue->queue_type];
+ bool found_hung_queue = false;
int r = 0;
if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
r = userq_funcs->preempt(uq_mgr, queue);
if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
+ found_hung_queue = true;
} else {
queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
}
}
+ if (found_hung_queue)
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+
return r;
}
@@ -126,16 +325,23 @@ amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs =
adev->userq_funcs[queue->queue_type];
+ bool found_hung_queue = false;
int r = 0;
if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
(queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
r = userq_funcs->unmap(uq_mgr, queue);
- if (r)
+ if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
- else
+ found_hung_queue = true;
+ } else {
queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
+ }
}
+
+ if (found_hung_queue)
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+
return r;
}
@@ -152,26 +358,33 @@ amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
r = userq_funcs->map(uq_mgr, queue);
if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
} else {
queue->state = AMDGPU_USERQ_STATE_MAPPED;
}
}
+
return r;
}
-static void
+static int
amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue)
{
struct dma_fence *f = queue->last_fence;
- int ret;
+ int ret = 0;
if (f && !dma_fence_is_signaled(f)) {
- ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
- if (ret <= 0)
+ ret = dma_fence_wait_timeout(f, true, MAX_SCHEDULE_TIMEOUT);
+ if (ret <= 0) {
drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
f->context, f->seqno);
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ return -ETIME;
+ }
}
+
+ return ret;
}
static void
@@ -182,16 +395,27 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
+ /* Wait for mode-1 reset to complete */
+ down_read(&adev->reset_domain->sem);
+
+ /* Drop the userq reference. */
+ amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
uq_funcs->mqd_destroy(uq_mgr, queue);
amdgpu_userq_fence_driver_free(queue);
- idr_remove(&uq_mgr->userq_idr, queue_id);
+ /* Use interrupt-safe locking since IRQ handlers may access these XArrays */
+ xa_erase_irq(&uq_mgr->userq_mgr_xa, (unsigned long)queue_id);
+ xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
+ queue->userq_mgr = NULL;
+ list_del(&queue->userq_va_list);
kfree(queue);
+
+ up_read(&adev->reset_domain->sem);
}
static struct amdgpu_usermode_queue *
amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
{
- return idr_find(&uq_mgr->userq_idr, qid);
+ return xa_load(&uq_mgr->userq_mgr_xa, qid);
}
void
@@ -319,17 +543,6 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
case AMDGPU_HW_IP_DMA:
db_size = sizeof(u64);
break;
-
- case AMDGPU_HW_IP_VCN_ENC:
- db_size = sizeof(u32);
- db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1;
- break;
-
- case AMDGPU_HW_IP_VPE:
- db_size = sizeof(u32);
- db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1;
- break;
-
default:
drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n",
db_info->queue_type);
@@ -378,10 +591,11 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
amdgpu_bo_unreserve(queue->db_obj.obj);
}
amdgpu_bo_unref(&queue->db_obj.obj);
-
+ atomic_dec(&uq_mgr->userq_count[queue->queue_type]);
#if defined(CONFIG_DEBUG_FS)
debugfs_remove_recursive(queue->debugfs_queue);
#endif
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
r = amdgpu_userq_unmap_helper(uq_mgr, queue);
/*TODO: It requires a reset for userq hw unmap error*/
if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) {
@@ -391,7 +605,6 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
mutex_unlock(&uq_mgr->userq_mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -463,8 +676,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
struct amdgpu_db_info db_info;
char *queue_name;
bool skip_map_queue;
+ u32 qid;
uint64_t index;
- int qid, r = 0;
+ int r = 0;
int priority =
(args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
@@ -487,7 +701,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
*
* This will also make sure we have a valid eviction fence ready to be used.
*/
- mutex_lock(&adev->userq_mutex);
amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
uq_funcs = adev->userq_funcs[args->in.ip_type];
@@ -505,14 +718,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
goto unlock;
}
- /* Validate the userq virtual address.*/
- if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) ||
- amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
- amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
- r = -EINVAL;
- kfree(queue);
- goto unlock;
- }
+ INIT_LIST_HEAD(&queue->userq_va_list);
queue->doorbell_handle = args->in.doorbell_handle;
queue->queue_type = args->in.ip_type;
queue->vm = &fpriv->vm;
@@ -523,6 +729,15 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
db_info.db_obj = &queue->db_obj;
db_info.doorbell_offset = args->in.doorbell_offset;
+ /* Validate the userq virtual address.*/
+ if (amdgpu_userq_input_va_validate(queue, args->in.queue_va, args->in.queue_size) ||
+ amdgpu_userq_input_va_validate(queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
+ amdgpu_userq_input_va_validate(queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
+ r = -EINVAL;
+ kfree(queue);
+ goto unlock;
+ }
+
/* Convert relative doorbell offset into absolute doorbell index */
index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
if (index == (uint64_t)-EINVAL) {
@@ -548,16 +763,27 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
goto unlock;
}
+ /* Wait for mode-1 reset to complete */
+ down_read(&adev->reset_domain->sem);
+ r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
+ if (r) {
+ kfree(queue);
+ up_read(&adev->reset_domain->sem);
+ goto unlock;
+ }
- qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
- if (qid < 0) {
+ r = xa_alloc(&uq_mgr->userq_mgr_xa, &qid, queue, XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
+ if (r) {
drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
amdgpu_userq_fence_driver_free(queue);
uq_funcs->mqd_destroy(uq_mgr, queue);
kfree(queue);
r = -ENOMEM;
+ up_read(&adev->reset_domain->sem);
goto unlock;
}
+ up_read(&adev->reset_domain->sem);
+ queue->userq_mgr = uq_mgr;
/* don't map the queue if scheduling is halted */
if (adev->userq_halt_for_enforce_isolation &&
@@ -570,7 +796,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
r = amdgpu_userq_map_helper(uq_mgr, queue);
if (r) {
drm_file_err(uq_mgr->file, "Failed to map Queue\n");
- idr_remove(&uq_mgr->userq_idr, qid);
+ xa_erase(&uq_mgr->userq_mgr_xa, qid);
amdgpu_userq_fence_driver_free(queue);
uq_funcs->mqd_destroy(uq_mgr, queue);
kfree(queue);
@@ -592,10 +818,10 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
kfree(queue_name);
args->out.queue_id = qid;
+ atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
unlock:
mutex_unlock(&uq_mgr->userq_mutex);
- mutex_unlock(&adev->userq_mutex);
return r;
}
@@ -693,11 +919,19 @@ static int
amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_usermode_queue *queue;
- int queue_id;
+ unsigned long queue_id;
int ret = 0, r;
/* Resume all the queues for this process */
- idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+ xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
+
+ if (!amdgpu_userq_buffer_vas_mapped(queue)) {
+ drm_file_err(uq_mgr->file,
+ "trying restore queue without va mapping\n");
+ queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
+ continue;
+ }
+
r = amdgpu_userq_restore_helper(uq_mgr, queue);
if (r)
ret = r;
@@ -760,12 +994,21 @@ static int
amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ bool invalidated = false, new_addition = false;
+ struct ttm_operation_ctx ctx = { true, false };
struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_hmm_range *range;
struct amdgpu_vm *vm = &fpriv->vm;
+ unsigned long key, tmp_key;
struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
struct drm_exec exec;
+ struct xarray xa;
int ret;
+ xa_init(&xa);
+
+retry_lock:
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
ret = amdgpu_vm_lock_pd(vm, &exec, 1);
@@ -792,10 +1035,74 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
goto unlock_all;
}
+ if (invalidated) {
+ xa_for_each(&xa, tmp_key, range) {
+ bo = range->bo;
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto unlock_all;
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto unlock_all;
+ }
+ invalidated = false;
+ }
+
ret = amdgpu_vm_handle_moved(adev, vm, NULL);
if (ret)
goto unlock_all;
+ key = 0;
+ /* Validate User Ptr BOs */
+ list_for_each_entry(bo_va, &vm->done, base.vm_status) {
+ bo = bo_va->base.bo;
+ if (!bo)
+ continue;
+
+ if (!amdgpu_ttm_tt_is_userptr(bo->tbo.ttm))
+ continue;
+
+ range = xa_load(&xa, key);
+ if (range && range->bo != bo) {
+ xa_erase(&xa, key);
+ amdgpu_hmm_range_free(range);
+ range = NULL;
+ }
+
+ if (!range) {
+ range = amdgpu_hmm_range_alloc(bo);
+ if (!range) {
+ ret = -ENOMEM;
+ goto unlock_all;
+ }
+
+ xa_store(&xa, key, range, GFP_KERNEL);
+ new_addition = true;
+ }
+ key++;
+ }
+
+ if (new_addition) {
+ drm_exec_fini(&exec);
+ xa_for_each(&xa, tmp_key, range) {
+ if (!range)
+ continue;
+ bo = range->bo;
+ ret = amdgpu_ttm_tt_get_user_pages(bo, range);
+ if (ret)
+ goto unlock_all;
+ }
+
+ invalidated = true;
+ new_addition = false;
+ goto retry_lock;
+ }
+
ret = amdgpu_vm_update_pdes(adev, vm, false);
if (ret)
goto unlock_all;
@@ -815,6 +1122,13 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
unlock_all:
drm_exec_fini(&exec);
+ xa_for_each(&xa, tmp_key, range) {
+ if (!range)
+ continue;
+ bo = range->bo;
+ amdgpu_hmm_range_free(range);
+ }
+ xa_destroy(&xa);
return ret;
}
@@ -848,11 +1162,12 @@ static int
amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_usermode_queue *queue;
- int queue_id;
+ unsigned long queue_id;
int ret = 0, r;
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
/* Try to unmap all the queues in this process ctx */
- idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+ xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
r = amdgpu_userq_preempt_helper(uq_mgr, queue);
if (r)
ret = r;
@@ -863,13 +1178,31 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
return ret;
}
+void amdgpu_userq_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ userq_reset_work);
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_USERQ;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
static int
amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_usermode_queue *queue;
- int queue_id, ret;
+ unsigned long queue_id;
+ int ret;
- idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+ xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
struct dma_fence *f = queue->last_fence;
if (!f || dma_fence_is_signaled(f))
@@ -889,22 +1222,19 @@ void
amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_eviction_fence *ev_fence)
{
- int ret;
struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ int ret;
/* Wait for any pending userqueue fence work to finish */
ret = amdgpu_userq_wait_for_signal(uq_mgr);
- if (ret) {
- drm_file_err(uq_mgr->file, "Not evicting userqueue, timeout waiting for work\n");
- return;
- }
+ if (ret)
+ dev_err(adev->dev, "Not evicting userqueue, timeout waiting for work\n");
ret = amdgpu_userq_evict_all(uq_mgr);
- if (ret) {
- drm_file_err(uq_mgr->file, "Failed to evict userqueue\n");
- return;
- }
+ if (ret)
+ dev_err(adev->dev, "Failed to evict userqueue\n");
/* Signal current eviction fence */
amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
@@ -922,44 +1252,31 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f
struct amdgpu_device *adev)
{
mutex_init(&userq_mgr->userq_mutex);
- idr_init_base(&userq_mgr->userq_idr, 1);
+ xa_init_flags(&userq_mgr->userq_mgr_xa, XA_FLAGS_ALLOC);
userq_mgr->adev = adev;
userq_mgr->file = file_priv;
- mutex_lock(&adev->userq_mutex);
- list_add(&userq_mgr->list, &adev->userq_mgr_list);
- mutex_unlock(&adev->userq_mutex);
-
INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
return 0;
}
void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
{
- struct amdgpu_device *adev = userq_mgr->adev;
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
- uint32_t queue_id;
+ unsigned long queue_id;
cancel_delayed_work_sync(&userq_mgr->resume_work);
- mutex_lock(&adev->userq_mutex);
mutex_lock(&userq_mgr->userq_mutex);
- idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) {
+ amdgpu_userq_detect_and_reset_queues(userq_mgr);
+ xa_for_each(&userq_mgr->userq_mgr_xa, queue_id, queue) {
amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
amdgpu_userq_unmap_helper(userq_mgr, queue);
amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
}
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
- if (uqm == userq_mgr) {
- list_del(&uqm->list);
- break;
- }
- }
- idr_destroy(&userq_mgr->userq_idr);
+ xa_destroy(&userq_mgr->userq_mgr_xa);
mutex_unlock(&userq_mgr->userq_mutex);
- mutex_unlock(&adev->userq_mutex);
mutex_destroy(&userq_mgr->userq_mutex);
}
@@ -967,57 +1284,51 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev)
{
u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
- int queue_id;
- int ret = 0, r;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+ int r;
if (!ip_mask)
return 0;
- mutex_lock(&adev->userq_mutex);
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
cancel_delayed_work_sync(&uqm->resume_work);
- mutex_lock(&uqm->userq_mutex);
- idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- if (adev->in_s0ix)
- r = amdgpu_userq_preempt_helper(uqm, queue);
- else
- r = amdgpu_userq_unmap_helper(uqm, queue);
- if (r)
- ret = r;
- }
- mutex_unlock(&uqm->userq_mutex);
+ guard(mutex)(&uqm->userq_mutex);
+ amdgpu_userq_detect_and_reset_queues(uqm);
+ if (adev->in_s0ix)
+ r = amdgpu_userq_preempt_helper(uqm, queue);
+ else
+ r = amdgpu_userq_unmap_helper(uqm, queue);
+ if (r)
+ return r;
}
- mutex_unlock(&adev->userq_mutex);
- return ret;
+ return 0;
}
int amdgpu_userq_resume(struct amdgpu_device *adev)
{
u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
- int queue_id;
- int ret = 0, r;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+ int r;
if (!ip_mask)
return 0;
- mutex_lock(&adev->userq_mutex);
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
- mutex_lock(&uqm->userq_mutex);
- idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- if (adev->in_s0ix)
- r = amdgpu_userq_restore_helper(uqm, queue);
- else
- r = amdgpu_userq_map_helper(uqm, queue);
- if (r)
- ret = r;
- }
- mutex_unlock(&uqm->userq_mutex);
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ guard(mutex)(&uqm->userq_mutex);
+ if (adev->in_s0ix)
+ r = amdgpu_userq_restore_helper(uqm, queue);
+ else
+ r = amdgpu_userq_map_helper(uqm, queue);
+ if (r)
+ return r;
}
- mutex_unlock(&adev->userq_mutex);
- return ret;
+
+ return 0;
}
int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
@@ -1025,33 +1336,32 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
{
u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
- int queue_id;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
int ret = 0, r;
/* only need to stop gfx/compute */
if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
return 0;
- mutex_lock(&adev->userq_mutex);
if (adev->userq_halt_for_enforce_isolation)
dev_warn(adev->dev, "userq scheduling already stopped!\n");
adev->userq_halt_for_enforce_isolation = true;
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
cancel_delayed_work_sync(&uqm->resume_work);
mutex_lock(&uqm->userq_mutex);
- idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
- (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
- (queue->xcp_id == idx)) {
- r = amdgpu_userq_preempt_helper(uqm, queue);
- if (r)
- ret = r;
- }
+ if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+ (queue->xcp_id == idx)) {
+ amdgpu_userq_detect_and_reset_queues(uqm);
+ r = amdgpu_userq_preempt_helper(uqm, queue);
+ if (r)
+ ret = r;
}
mutex_unlock(&uqm->userq_mutex);
}
- mutex_unlock(&adev->userq_mutex);
+
return ret;
}
@@ -1060,21 +1370,20 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
{
u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
- int queue_id;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
int ret = 0, r;
/* only need to stop gfx/compute */
if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
return 0;
- mutex_lock(&adev->userq_mutex);
if (!adev->userq_halt_for_enforce_isolation)
dev_warn(adev->dev, "userq scheduling already started!\n");
adev->userq_halt_for_enforce_isolation = false;
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
mutex_lock(&uqm->userq_mutex);
- idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
(queue->xcp_id == idx)) {
@@ -1082,9 +1391,92 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
if (r)
ret = r;
}
- }
mutex_unlock(&uqm->userq_mutex);
}
- mutex_unlock(&adev->userq_mutex);
+
return ret;
}
+
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t saddr)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_bo_va *bo_va = mapping->bo_va;
+ struct dma_resv *resv = bo_va->base.bo->tbo.base.resv;
+ int ret = 0;
+
+ if (!ip_mask)
+ return 0;
+
+ dev_warn_once(adev->dev, "now unmapping a vital queue va:%llx\n", saddr);
+ /**
+ * The userq VA mapping reservation should include the eviction fence,
+ * if the eviction fence can't signal successfully during unmapping,
+ * then driver will warn to flag this improper unmap of the userq VA.
+ * Note: The eviction fence may be attached to different BOs, and this
+ * unmap is only for one kind of userq VAs, so at this point suppose
+ * the eviction fence is always unsignaled.
+ */
+ if (!dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) {
+ ret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, true,
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret <= 0)
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+void amdgpu_userq_pre_reset(struct amdgpu_device *adev)
+{
+ const struct amdgpu_userq_funcs *userq_funcs;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ cancel_delayed_work_sync(&uqm->resume_work);
+ if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+ amdgpu_userq_wait_for_last_fence(uqm, queue);
+ userq_funcs = adev->userq_funcs[queue->queue_type];
+ userq_funcs->unmap(uqm, queue);
+ /* just mark all queues as hung at this point.
+ * if unmap succeeds, we could map again
+ * in amdgpu_userq_post_reset() if vram is not lost
+ */
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ amdgpu_userq_fence_driver_force_completion(queue);
+ }
+ }
+}
+
+int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost)
+{
+ /* if any queue state is AMDGPU_USERQ_STATE_UNMAPPED
+ * at this point, we should be able to map it again
+ * and continue if vram is not lost.
+ */
+ struct amdgpu_userq_mgr *uqm;
+ struct amdgpu_usermode_queue *queue;
+ const struct amdgpu_userq_funcs *userq_funcs;
+ unsigned long queue_id;
+ int r = 0;
+
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ if (queue->state == AMDGPU_USERQ_STATE_HUNG && !vram_lost) {
+ userq_funcs = adev->userq_funcs[queue->queue_type];
+ /* Re-map queue */
+ r = userq_funcs->map(uqm, queue);
+ if (r) {
+ dev_err(adev->dev, "Failed to remap queue %ld\n", queue_id);
+ continue;
+ }
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index c027dd916672..c37444427a14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -37,6 +37,7 @@ enum amdgpu_userq_state {
AMDGPU_USERQ_STATE_MAPPED,
AMDGPU_USERQ_STATE_PREEMPTED,
AMDGPU_USERQ_STATE_HUNG,
+ AMDGPU_USERQ_STATE_INVALID_VA,
};
struct amdgpu_mqd_prop;
@@ -47,6 +48,11 @@ struct amdgpu_userq_obj {
struct amdgpu_bo *obj;
};
+struct amdgpu_userq_va_cursor {
+ u64 gpu_addr;
+ struct list_head list;
+};
+
struct amdgpu_usermode_queue {
int queue_type;
enum amdgpu_userq_state state;
@@ -66,6 +72,8 @@ struct amdgpu_usermode_queue {
u32 xcp_id;
int priority;
struct dentry *debugfs_queue;
+
+ struct list_head userq_va_list;
};
struct amdgpu_userq_funcs {
@@ -88,12 +96,17 @@ struct amdgpu_userq_funcs {
/* Usermode queues for gfx */
struct amdgpu_userq_mgr {
- struct idr userq_idr;
+ /**
+ * @userq_mgr_xa: Per-process user queue map (queue ID → queue)
+ * Key: queue_id (unique ID within the process's userq manager)
+ * Value: struct amdgpu_usermode_queue
+ */
+ struct xarray userq_mgr_xa;
struct mutex userq_mutex;
struct amdgpu_device *adev;
struct delayed_work resume_work;
- struct list_head list;
struct drm_file *file;
+ atomic_t userq_count[AMDGPU_RING_TYPE_MAX];
};
struct amdgpu_db_info {
@@ -136,7 +149,13 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
u32 idx);
int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
u32 idx);
-
-int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
- u64 expected_size);
+void amdgpu_userq_reset_work(struct work_struct *work);
+void amdgpu_userq_pre_reset(struct amdgpu_device *adev);
+int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost);
+
+int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue,
+ u64 addr, u64 expected_size);
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t saddr);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 4d0096d0baa9..eba9fb359047 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -387,6 +387,7 @@ static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
amdgpu_bo_unreserve(queue->vm->root.bo);
r = amdgpu_bo_reserve(bo, true);
if (r) {
+ amdgpu_bo_unref(&bo);
DRM_ERROR("Failed to reserve userqueue wptr bo");
return r;
}
@@ -538,7 +539,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
}
/* Retrieve the user queue */
- queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
+ queue = xa_load(&userq_mgr->userq_mgr_xa, args->queue_id);
if (!queue) {
r = -ENOENT;
goto put_gobj_write;
@@ -900,7 +901,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
*/
num_fences = dma_fence_dedup_array(fences, num_fences);
- waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id);
+ waitq = xa_load(&userq_mgr->userq_mgr_xa, wait_info->waitq_id);
if (!waitq) {
r = -EINVAL;
goto free_fences;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index ce318f5de047..a7d8f1ce6ac2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -41,6 +41,9 @@
#define VCE_IDLE_TIMEOUT msecs_to_jiffies(1000)
/* Firmware Names */
+#ifdef CONFIG_DRM_AMDGPU_SI
+#define FIRMWARE_VCE_V1_0 "amdgpu/vce_1_0_0.bin"
+#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
#define FIRMWARE_BONAIRE "amdgpu/bonaire_vce.bin"
#define FIRMWARE_KABINI "amdgpu/kabini_vce.bin"
@@ -61,6 +64,9 @@
#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin"
#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin"
+#ifdef CONFIG_DRM_AMDGPU_SI
+MODULE_FIRMWARE(FIRMWARE_VCE_V1_0);
+#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
MODULE_FIRMWARE(FIRMWARE_KABINI);
@@ -88,82 +94,93 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence);
/**
- * amdgpu_vce_sw_init - allocate memory, load vce firmware
+ * amdgpu_vce_firmware_name() - determine the firmware file name for VCE
*
* @adev: amdgpu_device pointer
- * @size: size for the new BO
*
- * First step to get VCE online, allocate memory and load the firmware
+ * Each chip that has VCE IP may need a different firmware.
+ * This function returns the name of the VCE firmware file
+ * appropriate for the current chip.
*/
-int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
+static const char *amdgpu_vce_firmware_name(struct amdgpu_device *adev)
{
- const char *fw_name;
- const struct common_firmware_header *hdr;
- unsigned int ucode_version, version_major, version_minor, binary_id;
- int i, r;
-
switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_PITCAIRN:
+ case CHIP_TAHITI:
+ case CHIP_VERDE:
+ return FIRMWARE_VCE_V1_0;
+#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE:
- fw_name = FIRMWARE_BONAIRE;
- break;
+ return FIRMWARE_BONAIRE;
case CHIP_KAVERI:
- fw_name = FIRMWARE_KAVERI;
- break;
+ return FIRMWARE_KAVERI;
case CHIP_KABINI:
- fw_name = FIRMWARE_KABINI;
- break;
+ return FIRMWARE_KABINI;
case CHIP_HAWAII:
- fw_name = FIRMWARE_HAWAII;
- break;
+ return FIRMWARE_HAWAII;
case CHIP_MULLINS:
- fw_name = FIRMWARE_MULLINS;
- break;
+ return FIRMWARE_MULLINS;
#endif
case CHIP_TONGA:
- fw_name = FIRMWARE_TONGA;
- break;
+ return FIRMWARE_TONGA;
case CHIP_CARRIZO:
- fw_name = FIRMWARE_CARRIZO;
- break;
+ return FIRMWARE_CARRIZO;
case CHIP_FIJI:
- fw_name = FIRMWARE_FIJI;
- break;
+ return FIRMWARE_FIJI;
case CHIP_STONEY:
- fw_name = FIRMWARE_STONEY;
- break;
+ return FIRMWARE_STONEY;
case CHIP_POLARIS10:
- fw_name = FIRMWARE_POLARIS10;
- break;
+ return FIRMWARE_POLARIS10;
case CHIP_POLARIS11:
- fw_name = FIRMWARE_POLARIS11;
- break;
+ return FIRMWARE_POLARIS11;
case CHIP_POLARIS12:
- fw_name = FIRMWARE_POLARIS12;
- break;
+ return FIRMWARE_POLARIS12;
case CHIP_VEGAM:
- fw_name = FIRMWARE_VEGAM;
- break;
+ return FIRMWARE_VEGAM;
case CHIP_VEGA10:
- fw_name = FIRMWARE_VEGA10;
- break;
+ return FIRMWARE_VEGA10;
case CHIP_VEGA12:
- fw_name = FIRMWARE_VEGA12;
- break;
+ return FIRMWARE_VEGA12;
case CHIP_VEGA20:
- fw_name = FIRMWARE_VEGA20;
- break;
+ return FIRMWARE_VEGA20;
default:
- return -EINVAL;
+ return NULL;
}
+}
+
+/**
+ * amdgpu_vce_early_init() - try to load VCE firmware
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tries to load the VCE firmware.
+ *
+ * When not found, returns ENOENT so that the driver can
+ * still load and initialize the rest of the IP blocks.
+ * The GPU can function just fine without VCE, they will just
+ * not support video encoding.
+ */
+int amdgpu_vce_early_init(struct amdgpu_device *adev)
+{
+ const char *fw_name = amdgpu_vce_firmware_name(adev);
+ const struct common_firmware_header *hdr;
+ unsigned int ucode_version, version_major, version_minor, binary_id;
+ int r;
+
+ if (!fw_name)
+ return -ENOENT;
r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
if (r) {
- dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
- fw_name);
+ dev_err(adev->dev,
+ "amdgpu_vce: Firmware \"%s\" not found or failed to validate (%d)\n",
+ fw_name, r);
+
amdgpu_ucode_release(&adev->vce.fw);
- return r;
+ return -ENOENT;
}
hdr = (const struct common_firmware_header *)adev->vce.fw->data;
@@ -172,11 +189,35 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
version_major = (ucode_version >> 20) & 0xfff;
version_minor = (ucode_version >> 8) & 0xfff;
binary_id = ucode_version & 0xff;
- DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n",
+ dev_info(adev->dev, "Found VCE firmware Version: %d.%d Binary ID: %d\n",
version_major, version_minor, binary_id);
adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
(binary_id << 8));
+ return 0;
+}
+
+/**
+ * amdgpu_vce_sw_init() - allocate memory for VCE BO
+ *
+ * @adev: amdgpu_device pointer
+ * @size: size for the new BO
+ *
+ * First step to get VCE online: allocate memory for VCE BO.
+ * The VCE firmware binary is copied into the VCE BO later,
+ * in amdgpu_vce_resume. The VCE executes its code from the
+ * VCE BO and also uses the space in this BO for its stack and data.
+ *
+ * Ideally this BO should be placed in VRAM for optimal performance,
+ * although technically it also runs from system RAM (albeit slowly).
+ */
+int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
+{
+ int i, r;
+
+ if (!adev->vce.fw)
+ return -ENOENT;
+
r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT,
@@ -285,40 +326,23 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev)
*/
int amdgpu_vce_resume(struct amdgpu_device *adev)
{
- void *cpu_addr;
const struct common_firmware_header *hdr;
unsigned int offset;
- int r, idx;
+ int idx;
if (adev->vce.vcpu_bo == NULL)
return -EINVAL;
- r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
- if (r) {
- dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
- return r;
- }
-
- r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
- if (r) {
- amdgpu_bo_unreserve(adev->vce.vcpu_bo);
- dev_err(adev->dev, "(%d) VCE map failed\n", r);
- return r;
- }
-
hdr = (const struct common_firmware_header *)adev->vce.fw->data;
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
+ memset_io(adev->vce.cpu_addr, 0, amdgpu_bo_size(adev->vce.vcpu_bo));
+ memcpy_toio(adev->vce.cpu_addr, adev->vce.fw->data + offset,
adev->vce.fw->size - offset);
drm_dev_exit(idx);
}
- amdgpu_bo_kunmap(adev->vce.vcpu_bo);
-
- amdgpu_bo_unreserve(adev->vce.vcpu_bo);
-
return 0;
}
@@ -427,6 +451,24 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
}
/**
+ * amdgpu_vce_required_gart_pages() - gets number of GART pages required by VCE
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns how many GART pages we need before GTT for the VCE IP block.
+ * For VCE1, see vce_v1_0_ensure_vcpu_bo_32bit_addr for details.
+ * For VCE2+, this is not needed so return zero.
+ */
+u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev)
+{
+ /* VCE IP block not added yet, so can't use amdgpu_ip_version */
+ if (adev->family == AMDGPU_FAMILY_SI)
+ return 512;
+
+ return 0;
+}
+
+/**
* amdgpu_vce_get_create_msg - generate a VCE create msg
*
* @ring: ring we should submit the msg to
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 6e53f872d084..1c3464ce5037 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -51,14 +51,17 @@ struct amdgpu_vce {
struct drm_sched_entity entity;
uint32_t srbm_soft_reset;
unsigned num_rings;
+ uint32_t keyselect;
};
+int amdgpu_vce_early_init(struct amdgpu_device *adev);
int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);
int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
int amdgpu_vce_suspend(struct amdgpu_device *adev);
int amdgpu_vce_resume(struct amdgpu_device *adev);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
+u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
struct amdgpu_ib *ib);
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index dc8a17bcc3c8..82624b44e661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -100,7 +100,8 @@
#define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) \
({ \
- uint32_t internal_reg_offset, addr; \
+ /* To avoid a -Wunused-but-set-variable warning. */ \
+ uint32_t internal_reg_offset __maybe_unused, addr; \
bool video_range, video1_range, aon_range, aon1_range; \
\
addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
@@ -161,7 +162,8 @@
#define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \
({ \
- uint32_t internal_reg_offset, addr; \
+ /* To avoid a -Wunused-but-set-variable warning. */ \
+ uint32_t internal_reg_offset __maybe_unused, addr; \
bool video_range, video1_range, aon_range, aon1_range; \
\
addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index f96beb96c75c..47a6ce4fdc74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -44,6 +44,18 @@
vf2pf_info->ucode_info[ucode].version = ver; \
} while (0)
+#define mmRCC_CONFIG_MEMSIZE 0xde3
+
+const char *amdgpu_virt_dynamic_crit_table_name[] = {
+ "IP DISCOVERY",
+ "VBIOS IMG",
+ "RAS TELEMETRY",
+ "DATA EXCHANGE",
+ "BAD PAGE INFO",
+ "INIT HEADER",
+ "LAST",
+};
+
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{
/* By now all MMIO pages except mailbox are blocked */
@@ -150,9 +162,10 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
virt->ops->req_init_data(adev);
if (adev->virt.req_init_data_ver > 0)
- DRM_INFO("host supports REQ_INIT_DATA handshake\n");
+ dev_info(adev->dev, "host supports REQ_INIT_DATA handshake of critical_region_version %d\n",
+ adev->virt.req_init_data_ver);
else
- DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
+ dev_warn(adev->dev, "host doesn't support REQ_INIT_DATA handshake\n");
}
/**
@@ -205,12 +218,12 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
&adev->virt.mm_table.gpu_addr,
(void *)&adev->virt.mm_table.cpu_addr);
if (r) {
- DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
+ dev_err(adev->dev, "failed to alloc mm table and error = %d.\n", r);
return r;
}
memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
- DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
+ dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
adev->virt.mm_table.gpu_addr,
adev->virt.mm_table.cpu_addr);
return 0;
@@ -390,7 +403,9 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE,
&bo, NULL))
- DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
+ dev_dbg(adev->dev,
+ "RAS WARN: reserve vram for retired page %llx fail\n",
+ bp);
data->bps_bo[i] = bo;
}
data->last_reserved = i + 1;
@@ -658,10 +673,34 @@ out:
schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
}
+static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)
+{
+ uint32_t dataexchange_offset =
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
+ uint32_t dataexchange_size =
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
+ uint64_t pos = 0;
+
+ dev_info(adev->dev,
+ "Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+ dataexchange_offset, dataexchange_size);
+
+ if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
+ dev_err(adev->dev, "Data exchange data not aligned to 4 bytes\n");
+ return -EINVAL;
+ }
+
+ pos = (uint64_t)dataexchange_offset;
+ amdgpu_device_vram_access(adev, pos, pfvf_data,
+ dataexchange_size, false);
+
+ return 0;
+}
+
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
{
if (adev->virt.vf2pf_update_interval_ms != 0) {
- DRM_INFO("clean up the vf2pf work item\n");
+ dev_info(adev->dev, "clean up the vf2pf work item\n");
cancel_delayed_work_sync(&adev->virt.vf2pf_work);
adev->virt.vf2pf_update_interval_ms = 0;
}
@@ -669,13 +708,15 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
{
+ uint32_t *pfvf_data = NULL;
+
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
adev->virt.vf2pf_update_retry_cnt = 0;
if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
- DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
+ dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!");
} else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
/* go through this logic in ip_init and reset to init workqueue*/
amdgpu_virt_exchange_data(adev);
@@ -684,11 +725,34 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
} else if (adev->bios != NULL) {
/* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
- adev->virt.fw_reserve.p_pf2vf =
- (struct amd_sriov_msg_pf2vf_info_header *)
- (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+ if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+ pfvf_data =
+ kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
+ GFP_KERNEL);
+ if (!pfvf_data) {
+ dev_err(adev->dev, "Failed to allocate memory for pfvf_data\n");
+ return;
+ }
- amdgpu_virt_read_pf2vf_data(adev);
+ if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
+ goto free_pfvf_data;
+
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
+
+ amdgpu_virt_read_pf2vf_data(adev);
+
+free_pfvf_data:
+ kfree(pfvf_data);
+ pfvf_data = NULL;
+ adev->virt.fw_reserve.p_pf2vf = NULL;
+ } else {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+
+ amdgpu_virt_read_pf2vf_data(adev);
+ }
}
}
@@ -701,23 +765,38 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
if (adev->mman.fw_vram_usage_va) {
- adev->virt.fw_reserve.p_pf2vf =
- (struct amd_sriov_msg_pf2vf_info_header *)
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
- adev->virt.fw_reserve.p_vf2pf =
- (struct amd_sriov_msg_vf2pf_info_header *)
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
- adev->virt.fw_reserve.ras_telemetry =
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
+ if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
+ (AMD_SRIOV_MSG_SIZE_KB << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
+ } else {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+ }
} else if (adev->mman.drv_vram_usage_va) {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
- (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
- (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
adev->virt.fw_reserve.ras_telemetry =
- (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
}
amdgpu_virt_read_pf2vf_data(adev);
@@ -816,7 +895,7 @@ static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
break;
default: /* other chip doesn't support SRIOV */
is_sriov = false;
- DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
+ dev_err(adev->dev, "Unknown asic type: %d!\n", adev->asic_type);
break;
}
}
@@ -838,10 +917,220 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
RATELIMIT_MSG_ON_RELEASE);
mutex_init(&adev->virt.ras.ras_telemetry_mutex);
+ mutex_init(&adev->virt.access_req_mutex);
adev->virt.ras.cper_rptr = 0;
}
+static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end)
+{
+ uint32_t sum = 0;
+
+ if (buf_start >= buf_end)
+ return 0;
+
+ for (; buf_start < buf_end; buf_start++)
+ sum += buf_start[0];
+
+ return 0xffffffff - sum;
+}
+
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
+{
+ struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
+ u64 init_hdr_offset = adev->virt.init_data_header.offset;
+ u64 init_hdr_size = (u64)adev->virt.init_data_header.size_kb << 10; /* KB → bytes */
+ u64 vram_size;
+ u64 end;
+ int r = 0;
+ uint8_t checksum = 0;
+
+ /* Skip below init if critical region version != v2 */
+ if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
+ return 0;
+
+ if (init_hdr_offset < 0) {
+ dev_err(adev->dev, "Invalid init header offset\n");
+ return -EINVAL;
+ }
+
+ vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+ if (!vram_size || vram_size == U32_MAX)
+ return -EINVAL;
+ vram_size <<= 20;
+
+ if (check_add_overflow(init_hdr_offset, init_hdr_size, &end) || end > vram_size) {
+ dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n");
+ return -EINVAL;
+ }
+
+ /* Allocate for init_data_hdr */
+ init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), GFP_KERNEL);
+ if (!init_data_hdr)
+ return -ENOMEM;
+
+ amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t *)init_data_hdr,
+ sizeof(struct amd_sriov_msg_init_data_header), false);
+
+ /* Table validation */
+ if (strncmp(init_data_hdr->signature,
+ AMDGPU_SRIOV_CRIT_DATA_SIGNATURE,
+ AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) {
+ dev_err(adev->dev, "Invalid init data signature: %.4s\n",
+ init_data_hdr->signature);
+ r = -EINVAL;
+ goto out;
+ }
+
+ checksum = amdgpu_virt_crit_region_calc_checksum(
+ (uint8_t *)&init_data_hdr->initdata_offset,
+ (uint8_t *)init_data_hdr +
+ sizeof(struct amd_sriov_msg_init_data_header));
+ if (checksum != init_data_hdr->checksum) {
+ dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n",
+ checksum, init_data_hdr->checksum);
+ r = -EINVAL;
+ goto out;
+ }
+
+ memset(&adev->virt.crit_regn, 0, sizeof(adev->virt.crit_regn));
+ memset(adev->virt.crit_regn_tbl, 0, sizeof(adev->virt.crit_regn_tbl));
+
+ adev->virt.crit_regn.offset = init_data_hdr->initdata_offset;
+ adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb;
+
+ /* Validation and initialization for each table entry */
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_IPD_TABLE_ID)) {
+ if (!init_data_hdr->ip_discovery_size_in_kb ||
+ init_data_hdr->ip_discovery_size_in_kb > DISCOVERY_TMR_SIZE) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_IPD_TABLE_ID],
+ init_data_hdr->ip_discovery_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset =
+ init_data_hdr->ip_discovery_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb =
+ init_data_hdr->ip_discovery_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID)) {
+ if (!init_data_hdr->vbios_img_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID],
+ init_data_hdr->vbios_img_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset =
+ init_data_hdr->vbios_img_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb =
+ init_data_hdr->vbios_img_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID)) {
+ if (!init_data_hdr->ras_tele_info_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID],
+ init_data_hdr->ras_tele_info_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset =
+ init_data_hdr->ras_tele_info_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb =
+ init_data_hdr->ras_tele_info_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID)) {
+ if (!init_data_hdr->dataexchange_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID],
+ init_data_hdr->dataexchange_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset =
+ init_data_hdr->dataexchange_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb =
+ init_data_hdr->dataexchange_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID)) {
+ if (!init_data_hdr->bad_page_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID],
+ init_data_hdr->bad_page_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset =
+ init_data_hdr->bad_page_info_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
+ init_data_hdr->bad_page_size_in_kb;
+ }
+
+ /* Validation for critical region info */
+ if (adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb > DISCOVERY_TMR_SIZE) {
+ dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n",
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ /* reserved memory starts from crit region base offset with the size of 5MB */
+ adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
+ adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
+ dev_info(adev->dev,
+ "critical region v%d requested to reserve memory start at %08llx with %llu KB.\n",
+ init_data_hdr->version,
+ adev->mman.fw_vram_usage_start_offset,
+ adev->mman.fw_vram_usage_size >> 10);
+
+ adev->virt.is_dynamic_crit_regn_enabled = true;
+
+out:
+ kfree(init_data_hdr);
+ init_data_hdr = NULL;
+
+ return r;
+}
+
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+ int data_id, uint8_t *binary, u32 *size)
+{
+ uint32_t data_offset = 0;
+ uint32_t data_size = 0;
+ enum amd_sriov_msg_table_id_enum data_table_id = data_id;
+
+ if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
+ return -EINVAL;
+
+ data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
+ data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
+
+ /* Validate on input params */
+ if (!binary || !size || *size < (uint64_t)data_size)
+ return -EINVAL;
+
+ /* Proceed to copy the dynamic content */
+ amdgpu_device_vram_access(adev,
+ (uint64_t)data_offset, (uint32_t *)binary, data_size, false);
+ *size = (uint64_t)data_size;
+
+ dev_dbg(adev->dev,
+ "Got %s info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+ amdgpu_virt_dynamic_crit_table_name[data_id], data_offset, data_size);
+
+ return 0;
+}
+
void amdgpu_virt_init(struct amdgpu_device *adev)
{
bool is_sriov = false;
@@ -1289,7 +1578,7 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block bloc
case AMDGPU_RAS_BLOCK__MPIO:
return RAS_TELEMETRY_GPU_BLOCK_MPIO;
default:
- DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
+ dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n",
block);
return RAS_TELEMETRY_GPU_BLOCK_COUNT;
}
@@ -1304,7 +1593,7 @@ static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
checksum = host_telemetry->header.checksum;
used_size = host_telemetry->header.used_size;
- if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
return 0;
tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
@@ -1383,7 +1672,7 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
checksum = host_telemetry->header.checksum;
used_size = host_telemetry->header.used_size;
- if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
return -EINVAL;
cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
@@ -1515,7 +1804,7 @@ static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
checksum = host_telemetry->header.checksum;
used_size = host_telemetry->header.used_size;
- if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
return 0;
tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index d1172c8e58c4..01d5bca2dee1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -54,6 +54,12 @@
#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
+/* Signature used to validate the SR-IOV dynamic critical region init data header ("INDA") */
+#define AMDGPU_SRIOV_CRIT_DATA_SIGNATURE "INDA"
+#define AMDGPU_SRIOV_CRIT_DATA_SIG_LEN 4
+
+#define IS_SRIOV_CRIT_REGN_ENTRY_VALID(hdr, id) ((hdr)->valid_tables & (1 << (id)))
+
enum amdgpu_sriov_vf_mode {
SRIOV_VF_MODE_BARE_METAL = 0,
SRIOV_VF_MODE_ONE_VF,
@@ -144,6 +150,7 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_RAS_CAPS = (1 << 9),
AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10),
AMDGIM_FEATURE_RAS_CPER = (1 << 11),
+ AMDGIM_FEATURE_XGMI_TA_EXT_PEER_LINK = (1 << 12),
};
enum AMDGIM_REG_ACCESS_FLAG {
@@ -262,6 +269,11 @@ struct amdgpu_virt_ras {
DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST);
+struct amdgpu_virt_region {
+ uint32_t offset;
+ uint32_t size_kb;
+};
+
/* GPU virtualization */
struct amdgpu_virt {
uint32_t caps;
@@ -289,6 +301,12 @@ struct amdgpu_virt {
bool ras_init_done;
uint32_t reg_access;
+ /* dynamic(v2) critical regions */
+ struct amdgpu_virt_region init_data_header;
+ struct amdgpu_virt_region crit_regn;
+ struct amdgpu_virt_region crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID];
+ bool is_dynamic_crit_regn_enabled;
+
/* vf2pf message */
struct delayed_work vf2pf_work;
uint32_t vf2pf_update_interval_ms;
@@ -307,6 +325,8 @@ struct amdgpu_virt {
/* Spinlock to protect access to the RLCG register interface */
spinlock_t rlcg_reg_lock;
+ struct mutex access_req_mutex;
+
union amd_sriov_ras_caps ras_en_caps;
union amd_sriov_ras_caps ras_telemetry_en_caps;
struct amdgpu_virt_ras ras;
@@ -378,6 +398,9 @@ struct amdgpu_video_codec_info;
#define amdgpu_sriov_ras_cper_en(adev) \
((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CPER)
+#define amdgpu_sriov_xgmi_ta_ext_peer_link_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_XGMI_TA_EXT_PEER_LINK)
+
static inline bool is_virtual_machine(void)
{
#if defined(CONFIG_X86)
@@ -424,6 +447,10 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
void amdgpu_virt_init(struct amdgpu_device *adev);
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+ int data_id, uint8_t *binary, u32 *size);
+
bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 676e24fb8864..a67285118c37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -484,15 +484,19 @@ int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
spin_lock(&vm->status_lock);
while (!list_is_head(prev->next, &vm->done)) {
bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
- spin_unlock(&vm->status_lock);
bo = bo_va->base.bo;
if (bo) {
+ amdgpu_bo_ref(bo);
+ spin_unlock(&vm->status_lock);
+
ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1);
+ amdgpu_bo_unref(&bo);
if (unlikely(ret))
return ret;
+
+ spin_lock(&vm->status_lock);
}
- spin_lock(&vm->status_lock);
prev = prev->next;
}
spin_unlock(&vm->status_lock);
@@ -779,7 +783,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
bool cleaner_shader_needed = false;
bool pasid_mapping_needed = false;
struct dma_fence *fence = NULL;
- struct amdgpu_fence *af;
unsigned int patch;
int r;
@@ -842,12 +845,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
}
if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
- r = amdgpu_fence_emit(ring, &fence, NULL, 0);
+ r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
if (r)
return r;
- /* this is part of the job's context */
- af = container_of(fence, struct amdgpu_fence, base);
- af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
+ fence = &job->hw_vm_fence->base;
+ /* get a ref for the job */
+ dma_fence_get(fence);
}
if (vm_flush_needed) {
@@ -1952,6 +1955,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_vm *vm = bo_va->base.vm;
bool valid = true;
+ int r;
saddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -1972,6 +1976,17 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
return -ENOENT;
}
+ /* It's unlikely to happen that the mapping userq hasn't been idled
+ * during user requests GEM unmap IOCTL except for forcing the unmap
+ * from user space.
+ */
+ if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) {
+ r = amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr);
+ if (unlikely(r == -EBUSY))
+ dev_warn_once(adev->dev,
+ "Attempt to unmap an active userq buffer\n");
+ }
+
list_del(&mapping->list);
amdgpu_vm_it_remove(mapping, &vm->va);
mapping->bo_va = NULL;
@@ -2828,8 +2843,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
*/
void amdgpu_vm_manager_init(struct amdgpu_device *adev)
{
- unsigned i;
-
/* Concurrent flushes are only possible starting with Vega10 and
* are broken on Navi10 and Navi14.
*/
@@ -2838,11 +2851,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
adev->asic_type == CHIP_NAVI14);
amdgpu_vmid_mgr_init(adev);
- adev->vm_manager.fence_context =
- dma_fence_context_alloc(AMDGPU_MAX_RINGS);
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
- adev->vm_manager.seqno[i] = 0;
-
spin_lock_init(&adev->vm_manager.prt_lock);
atomic_set(&adev->vm_manager.num_prt_users, 0);
@@ -2908,8 +2916,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
/* We only have requirement to reserve vmid from gfxhub */
- amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
- break;
+ return amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
case AMDGPU_VM_OP_UNRESERVE_VMID:
amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index cf0ec94e8a07..15d757c016cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -453,10 +453,6 @@ struct amdgpu_vm_manager {
unsigned int first_kfd_vmid;
bool concurrent_flush;
- /* Handling of VM fences */
- u64 fence_context;
- unsigned seqno[AMDGPU_MAX_RINGS];
-
uint64_t max_pfn;
uint32_t num_level;
uint32_t block_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 1ede308a7c67..aad530c46a9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -298,6 +298,9 @@ int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num)
{
int link_map_6_4_x[8] = { 0, 3, 1, 2, 7, 6, 4, 5 };
+ if (adev->gmc.xgmi.num_physical_nodes <= 1)
+ return -EINVAL;
+
switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
case IP_VERSION(6, 4, 0):
case IP_VERSION(6, 4, 1):
@@ -333,6 +336,10 @@ static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link
}
i = global_link_num / n;
+
+ if (!(adev->aid_mask & BIT(i)))
+ return U32_MAX;
+
addr += adev->asic_funcs->encode_ext_smn_addressing(i);
return RREG32_PCIE_EXT(addr);
@@ -342,6 +349,9 @@ int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num)
{
u32 xgmi_state_reg_val;
+ if (adev->gmc.xgmi.num_physical_nodes <= 1)
+ return -EINVAL;
+
switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
case IP_VERSION(6, 4, 0):
case IP_VERSION(6, 4, 1):
@@ -958,28 +968,6 @@ static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf
return 0;
}
-static void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev)
-{
- struct psp_xgmi_topology_info *top_info = &adev->psp.xgmi_context.top_info;
- struct psp_xgmi_topology_info *peer_info = &peer_adev->psp.xgmi_context.top_info;
-
- for (int i = 0; i < peer_info->num_nodes; i++) {
- if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id) {
- for (int j = 0; j < top_info->num_nodes; j++) {
- if (top_info->nodes[j].node_id == peer_adev->gmc.xgmi.node_id) {
- peer_info->nodes[i].num_hops = top_info->nodes[j].num_hops;
- peer_info->nodes[i].is_sharing_enabled =
- top_info->nodes[j].is_sharing_enabled;
- peer_info->nodes[i].num_links =
- top_info->nodes[j].num_links;
- return;
- }
- }
- }
- }
-}
-
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{
struct psp_xgmi_topology_info *top_info;
@@ -1065,11 +1053,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
/* To do: continue with some node failed or disable the whole hive*/
goto exit_unlock;
}
-
- /* fill the topology info for peers instead of getting from PSP */
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- amdgpu_xgmi_fill_topology_info(adev, tmp_adev);
- }
} else {
/* get latest topology info for each device from psp */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 3a79ed7d8031..3cdb1e0eca37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -23,26 +23,84 @@
#ifndef AMDGV_SRIOV_MSG__H_
#define AMDGV_SRIOV_MSG__H_
-/* unit in kilobytes */
-#define AMD_SRIOV_MSG_VBIOS_OFFSET 0
-#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64
-#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
-#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4
-#define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048
-#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB 2
-#define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB 64
+#define AMD_SRIOV_MSG_SIZE_KB 1
+
/*
- * layout
+ * layout v1
* 0 64KB 65KB 66KB 68KB 132KB
* | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ...
* | 64KB | 1KB | 1KB | 2KB | 64KB | ...
*/
-#define AMD_SRIOV_MSG_SIZE_KB 1
-#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
-#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
-#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
-#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB)
+/*
+ * layout v2 (offsets are dynamically allocated and the offsets below are examples)
+ * 0 1KB 64KB 65KB 66KB 68KB 132KB
+ * | INITD_H | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ...
+ * | 1KB | 64KB | 1KB | 1KB | 2KB | 64KB | ...
+ *
+ * Note: PF2VF + VF2PF + Bad Page = DataExchange region (allocated contiguously)
+ */
+
+/* v1 layout sizes */
+#define AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 64
+#define AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 1
+#define AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 1
+#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1 2
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 64
+#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 \
+ (AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 + AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 + \
+ AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+
+/* v1 offsets */
+#define AMD_SRIOV_MSG_VBIOS_OFFSET_V1 0
+#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1 AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1
+#define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048
+#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1
+#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 \
+ (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 \
+ (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 \
+ (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+#define AMD_SRIOV_MSG_INIT_DATA_TOT_SIZE_KB_V1 \
+ (AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
+ AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
+
+enum amd_sriov_crit_region_version {
+ GPU_CRIT_REGION_V1 = 1,
+ GPU_CRIT_REGION_V2 = 2,
+};
+
+/* v2 layout offset enum (in order of allocation) */
+enum amd_sriov_msg_table_id_enum {
+ AMD_SRIOV_MSG_IPD_TABLE_ID = 0,
+ AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
+ AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID,
+ AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID,
+ AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID,
+ AMD_SRIOV_MSG_INITD_H_TABLE_ID,
+ AMD_SRIOV_MSG_MAX_TABLE_ID,
+};
+
+struct amd_sriov_msg_init_data_header {
+ char signature[4]; /* "INDA" */
+ uint32_t version;
+ uint32_t checksum;
+ uint32_t initdata_offset; /* 0 */
+ uint32_t initdata_size_in_kb; /* 5MB */
+ uint32_t valid_tables;
+ uint32_t vbios_img_offset;
+ uint32_t vbios_img_size_in_kb;
+ uint32_t dataexchange_offset;
+ uint32_t dataexchange_size_in_kb;
+ uint32_t ras_tele_info_offset;
+ uint32_t ras_tele_info_size_in_kb;
+ uint32_t ip_discovery_offset;
+ uint32_t ip_discovery_size_in_kb;
+ uint32_t bad_page_info_offset;
+ uint32_t bad_page_size_in_kb;
+ uint32_t reserved[8];
+};
/*
* PF2VF history log:
@@ -102,7 +160,8 @@ union amd_sriov_msg_feature_flags {
uint32_t ras_caps : 1;
uint32_t ras_telemetry : 1;
uint32_t ras_cper : 1;
- uint32_t reserved : 20;
+ uint32_t xgmi_ta_ext_peer_link : 1;
+ uint32_t reserved : 19;
} flags;
uint32_t all;
};
@@ -140,8 +199,9 @@ union amd_sriov_ras_caps {
uint64_t block_jpeg : 1;
uint64_t block_ih : 1;
uint64_t block_mpio : 1;
+ uint64_t block_mmsch : 1;
uint64_t poison_propogation_mode : 1;
- uint64_t reserved : 44;
+ uint64_t reserved : 43;
} bits;
uint64_t all;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 41f4705bdbbd..876a3256dba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -156,6 +156,9 @@ static int cik_ih_irq_init(struct amdgpu_device *adev)
/* enable irqs */
cik_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -192,6 +195,9 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
/* When a ring buffer overflow happen start parsing interrupt
@@ -211,6 +217,8 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(mmIH_RB_CNTL, tmp);
}
+
+out:
return (wptr & ih->ptr_mask);
}
@@ -306,6 +314,10 @@ static int cik_ih_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
r = amdgpu_irq_init(adev);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index 2f891fb846d5..bc7a2e06ab5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -157,6 +157,9 @@ static int cz_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
cz_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -194,6 +197,9 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -297,6 +303,10 @@ static int cz_ih_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
r = amdgpu_irq_init(adev);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 8841d7213de4..d75b9940f248 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4956,7 +4956,8 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
- if (!amdgpu_sriov_vf(adev)) {
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
@@ -9951,6 +9952,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
};
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index f2be16e700c4..8a2ee2de390f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1821,13 +1821,15 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(11, 0, 3):
if ((adev->gfx.me_fw_version >= 2280) &&
(adev->gfx.mec_fw_version >= 2410) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
break;
default:
- if (!amdgpu_sriov_vf(adev)) {
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
@@ -2438,7 +2440,7 @@ static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
if (version_minor == 3)
gfx_v11_0_load_rlcp_rlcv_microcode(adev);
}
-
+
return 0;
}
@@ -3886,7 +3888,7 @@ static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
memcpy(fw, fw_data, fw_size);
-
+
amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
@@ -7318,6 +7320,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
.emit_wreg = gfx_v11_0_ring_emit_wreg,
.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
};
static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 93fde0f9af87..d01d2712cf57 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -1548,7 +1548,8 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(12, 0, 1):
if ((adev->gfx.me_fw_version >= 2660) &&
(adev->gfx.mec_fw_version >= 2920) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
@@ -5595,6 +5596,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
.emit_wreg = gfx_v12_0_ring_emit_wreg,
.emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
};
static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8a81713d97aa..1c87375e1dd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6944,6 +6944,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_rreg = gfx_v8_0_ring_emit_rreg,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
+ .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
};
static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index dd19a97436db..0148d7ff34d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2409,7 +2409,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset)
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
@@ -7586,6 +7586,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
};
static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index c90cbe053ef3..cbb74ffc4792 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1149,14 +1149,16 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
if ((adev->gfx.mec_fw_version >= 155) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
}
break;
case IP_VERSION(9, 5, 0):
if ((adev->gfx.mec_fw_version >= 21) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
}
@@ -2152,7 +2154,8 @@ static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id)
return 0;
}
-static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, bool restore)
+static void gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id,
+ bool restore)
{
struct amdgpu_device *adev = ring->adev;
struct v9_mqd *mqd = ring->mqd_ptr;
@@ -2186,8 +2189,6 @@ static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, b
atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
amdgpu_ring_clear_ring(ring);
}
-
- return 0;
}
static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_id)
@@ -2220,7 +2221,7 @@ static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id)
static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_ring *ring;
- int i, r;
+ int i;
gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id);
@@ -2228,9 +2229,7 @@ static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id)
ring = &adev->gfx.compute_ring[i + xcc_id *
adev->gfx.num_compute_rings];
- r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false);
- if (r)
- return r;
+ gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false);
}
return amdgpu_gfx_enable_kcq(adev, xcc_id);
@@ -3607,11 +3606,8 @@ pipe_reset:
return r;
}
- r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true);
- if (r) {
- dev_err(adev->dev, "fail to init kcq\n");
- return r;
- }
+ gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true);
+
spin_lock_irqsave(&kiq->ring_lock, flags);
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
if (r) {
@@ -4800,6 +4796,7 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = {
.emit_wreg = gfx_v9_4_3_ring_emit_wreg,
.emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush,
};
static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d7499be8c4bf..ce6e04242c52 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -103,8 +103,10 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
uint32_t vmhub_index = entry->client_id == SOC15_IH_CLIENTID_VMC ?
AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index];
- bool retry_fault = !!(entry->src_data[1] & 0x80);
- bool write_fault = !!(entry->src_data[1] & 0x20);
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
struct amdgpu_task_info *task_info;
uint32_t status = 0;
u64 addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 7bc389d9f5c4..ba59ee8e398a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -103,12 +103,41 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
uint32_t vmhub_index = entry->client_id == SOC21_IH_CLIENTID_VMC ?
AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index];
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
uint32_t status = 0;
u64 addr;
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+ if (retry_fault) {
+ /* Returning 1 here also prevents sending the IV to the KFD */
+
+ /* Process it only if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
+ entry->timestamp, write_fault))
+ return 1;
+ }
+
if (!amdgpu_sriov_vf(adev)) {
/*
* Issue a dummy read to wait for the status register to
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index f4a19357ccbc..7a9d6894e321 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -91,6 +91,10 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
struct amdgpu_vmhub *hub;
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
uint32_t status = 0;
u64 addr;
@@ -102,6 +106,31 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
else
hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ if (retry_fault) {
+ /* Returning 1 here also prevents sending the IV to the KFD */
+
+ /* Process it only if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
+ entry->timestamp, write_fault))
+ return 1;
+ }
+
if (!amdgpu_sriov_vf(adev)) {
/*
* Issue a dummy read to wait for the status register to
@@ -312,9 +341,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
return;
}
- mutex_lock(&adev->mman.gtt_window_lock);
gmc_v12_0_flush_vm_hub(adev, vmid, vmhub, 0);
- mutex_unlock(&adev->mman.gtt_window_lock);
return;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index f6ad7911f1e6..a8ec95f42926 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -213,7 +213,7 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW);
}
static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
@@ -610,23 +610,21 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
}
static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
- u32 status, u32 addr, u32 mc_client)
+ u32 status, u32 addr)
{
u32 mc_id;
u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
PROTECTIONS);
- char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
- (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_ID);
- dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+ dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from %d\n",
protections, vmid, addr,
REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_RW) ?
- "write" : "read", block, mc_client, mc_id);
+ "write" : "read", mc_id);
}
static const u32 mc_cg_registers[] = {
@@ -1072,6 +1070,12 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
{
u32 addr, status;
+ /* Delegate to the soft IRQ handler ring */
+ if (adev->irq.ih_soft.enabled && entry->ih != &adev->irq.ih_soft) {
+ amdgpu_irq_delegate(adev, entry, 4);
+ return 1;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
WREG32_P(mmVM_CONTEXT1_CNTL2, 1, ~1);
@@ -1079,6 +1083,10 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
if (!addr && !status)
return 0;
+ amdgpu_vm_update_fault_cache(adev, entry->pasid,
+ ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT,
+ status, AMDGPU_GFXHUB(0));
+
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v6_0_set_fault_enable_default(adev, false);
@@ -1089,7 +1097,7 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
status);
- gmc_v6_0_vm_decode_fault(adev, status, addr, 0);
+ gmc_v6_0_vm_decode_fault(adev, status, addr);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 0e5e54d0a9a5..fbd0bf147f50 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1261,6 +1261,12 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
{
u32 addr, status, mc_client, vmid;
+ /* Delegate to the soft IRQ handler ring */
+ if (adev->irq.ih_soft.enabled && entry->ih != &adev->irq.ih_soft) {
+ amdgpu_irq_delegate(adev, entry, 4);
+ return 1;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index e1509480dfc2..6551b60f2584 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1439,6 +1439,12 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+ /* Delegate to the soft IRQ handler ring */
+ if (adev->irq.ih_soft.enabled && entry->ih != &adev->irq.ih_soft) {
+ amdgpu_irq_delegate(adev, entry, 4);
+ return 1;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 0d1dd587db5f..8ad7519f7b58 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -544,8 +544,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- bool retry_fault = !!(entry->src_data[1] & 0x80);
- bool write_fault = !!(entry->src_data[1] & 0x20);
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
uint32_t status = 0, cid = 0, rw = 0, fed = 0;
struct amdgpu_task_info *task_info;
struct amdgpu_vmhub *hub;
@@ -1843,6 +1845,10 @@ static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) &&
+ adev->rev_id == 0x3)
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+
if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) {
vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
adev->gmc.vram_vendor = vram_info & 0xF;
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index 1317ede131b6..01cadf898c00 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -157,6 +157,9 @@ static int iceland_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
iceland_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -194,6 +197,9 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -296,6 +302,10 @@ static int iceland_ih_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
r = amdgpu_irq_init(adev);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 1cd9eaeef38f..64cae89357b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -205,10 +205,11 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev);
struct mes_detect_and_reset_queue_input input;
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
unsigned int hung_db_num = 0;
- int queue_id, r, i;
+ unsigned long queue_id;
u32 db_array[8];
+ bool found_hung_queue = false;
+ int r, i;
if (db_array_size > 8) {
dev_err(adev->dev, "DB array size (%d vs 8) too small\n",
@@ -227,22 +228,26 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
if (r) {
dev_err(adev->dev, "Failed to detect and reset queues, err (%d)\n", r);
} else if (hung_db_num) {
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
- idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- if (queue->queue_type == queue_type) {
- for (i = 0; i < hung_db_num; i++) {
- if (queue->doorbell_index == db_array[i]) {
- queue->state = AMDGPU_USERQ_STATE_HUNG;
- atomic_inc(&adev->gpu_reset_counter);
- amdgpu_userq_fence_driver_force_completion(queue);
- drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL);
- }
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ if (queue->queue_type == queue_type) {
+ for (i = 0; i < hung_db_num; i++) {
+ if (queue->doorbell_index == db_array[i]) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ found_hung_queue = true;
+ atomic_inc(&adev->gpu_reset_counter);
+ amdgpu_userq_fence_driver_force_completion(queue);
+ drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL);
}
}
}
}
}
+ if (found_hung_queue) {
+ /* Resume scheduling after hang recovery */
+ r = amdgpu_mes_resume(adev);
+ }
+
return r;
}
@@ -254,7 +259,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
struct drm_amdgpu_userq_in *mqd_user = args_in;
struct amdgpu_mqd_prop *userq_props;
- struct amdgpu_gfx_shadow_info shadow_info;
int r;
/* Structure to initialize MQD for userqueue using generic MQD init function */
@@ -280,8 +284,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
userq_props->doorbell_index = queue->doorbell_index;
userq_props->fence_address = queue->fence_drv->gpu_addr;
- if (adev->gfx.funcs->get_gfx_shadow_info)
- adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true);
if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
@@ -298,8 +300,9 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
goto free_mqd;
}
- if (amdgpu_userq_input_va_validate(queue->vm, compute_mqd->eop_va,
- max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE)))
+ r = amdgpu_userq_input_va_validate(queue, compute_mqd->eop_va,
+ 2048);
+ if (r)
goto free_mqd;
userq_props->eop_gpu_addr = compute_mqd->eop_va;
@@ -311,6 +314,14 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
kfree(compute_mqd);
} else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11;
+ struct amdgpu_gfx_shadow_info shadow_info;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true);
+ } else {
+ r = -EINVAL;
+ goto free_mqd;
+ }
if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) {
DRM_ERROR("Invalid GFX MQD\n");
@@ -330,8 +341,13 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
userq_props->tmz_queue =
mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
- if (amdgpu_userq_input_va_validate(queue->vm, mqd_gfx_v11->shadow_va,
- shadow_info.shadow_size))
+ r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->shadow_va,
+ shadow_info.shadow_size);
+ if (r)
+ goto free_mqd;
+ r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->csa_va,
+ shadow_info.csa_size);
+ if (r)
goto free_mqd;
kfree(mqd_gfx_v11);
@@ -350,9 +366,9 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
r = -ENOMEM;
goto free_mqd;
}
-
- if (amdgpu_userq_input_va_validate(queue->vm, mqd_sdma_v11->csa_va,
- shadow_info.csa_size))
+ r = amdgpu_userq_input_va_validate(queue, mqd_sdma_v11->csa_va,
+ 32);
+ if (r)
goto free_mqd;
userq_props->csa_addr = mqd_sdma_v11->csa_va;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index da575bb1377f..3a52754b5cad 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -369,6 +369,7 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input)
{
union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ uint32_t mes_rev = mes->sched_version & AMDGPU_MES_VERSION_MASK;
memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
@@ -379,6 +380,9 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+ if (mes_rev >= 0x60)
+ mes_remove_queue_pkt.remove_queue_after_reset = input->remove_queue_after_reset;
+
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 7f3512d9de07..744e95d3984a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -361,6 +361,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input)
{
union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ uint32_t mes_rev = mes->sched_version & AMDGPU_MES_VERSION_MASK;
memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
@@ -371,6 +372,9 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+ if (mes_rev >= 0x5a)
+ mes_remove_queue_pkt.remove_queue_after_reset = input->remove_queue_after_reset;
+
return mes_v12_0_submit_pkt_and_poll_completion(mes,
AMDGPU_MES_SCHED_PIPE,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index e5282a5d05d9..e7cd07383d56 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -173,13 +173,17 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
enum idh_request req, u32 data1, u32 data2, u32 data3)
{
- int r, retry = 1;
+ struct amdgpu_virt *virt = &adev->virt;
+ int r = 0, retry = 1;
enum idh_event event = -1;
+ mutex_lock(&virt->access_req_mutex);
send_request:
- if (amdgpu_ras_is_rma(adev))
- return -ENODEV;
+ if (amdgpu_ras_is_rma(adev)) {
+ r = -ENODEV;
+ goto out;
+ }
xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3);
@@ -217,17 +221,25 @@ send_request:
if (req != IDH_REQ_GPU_INIT_DATA) {
dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r);
- return r;
+ goto out;
} else /* host doesn't support REQ_GPU_INIT_DATA handshake */
adev->virt.req_init_data_ver = 0;
} else {
if (req == IDH_REQ_GPU_INIT_DATA) {
- adev->virt.req_init_data_ver =
- RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1);
-
- /* assume V1 in case host doesn't set version number */
- if (adev->virt.req_init_data_ver < 1)
- adev->virt.req_init_data_ver = 1;
+ switch (RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1)) {
+ case GPU_CRIT_REGION_V2:
+ adev->virt.req_init_data_ver = GPU_CRIT_REGION_V2;
+ adev->virt.init_data_header.offset =
+ RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW2);
+ adev->virt.init_data_header.size_kb =
+ RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW3);
+ break;
+ default:
+ adev->virt.req_init_data_ver = GPU_CRIT_REGION_V1;
+ adev->virt.init_data_header.offset = -1;
+ adev->virt.init_data_header.size_kb = 0;
+ break;
+ }
}
}
@@ -238,7 +250,10 @@ send_request:
}
}
- return 0;
+out:
+ mutex_unlock(&virt->access_req_mutex);
+
+ return r;
}
static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
@@ -285,7 +300,8 @@ static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev,
static int xgpu_nv_request_init_data(struct amdgpu_device *adev)
{
- return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
+ return xgpu_nv_send_access_requests_with_param(adev, IDH_REQ_GPU_INIT_DATA,
+ 0, GPU_CRIT_REGION_V2, 0);
}
static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 1c22bc11c1f8..bdfd2917e3ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -41,19 +41,21 @@ static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev)
static u32 nbio_v7_9_get_rev_id(struct amdgpu_device *adev)
{
- u32 tmp;
-
- tmp = IP_VERSION_SUBREV(amdgpu_ip_version_full(adev, NBIO_HWIP, 0));
- /* If it is VF or subrevision holds a non-zero value, that should be used */
- if (tmp || amdgpu_sriov_vf(adev))
- return tmp;
+ u32 rev_id;
- /* If discovery subrev is not updated, use register version */
- tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
- tmp = REG_GET_FIELD(tmp, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0,
- STRAP_ATI_REV_ID_DEV0_F0);
+ /*
+ * fetch the sub-revision field from the IP-discovery table
+ * (returns zero if the table entry is not populated).
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ rev_id = IP_VERSION_SUBREV(amdgpu_ip_version_full(adev, NBIO_HWIP, 0));
+ } else {
+ rev_id = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+ rev_id = REG_GET_FIELD(rev_id, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0,
+ STRAP_ATI_REV_ID_DEV0_F0);
+ }
- return tmp;
+ return rev_id;
}
static void nbio_v7_9_mc_access_enable(struct amdgpu_device *adev, bool enable)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 36b1ca73c2ed..a1443990d5c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -2361,11 +2361,15 @@ static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
- if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ if ((adev->gfx.mec_fw_version >= 0xb0) &&
+ amdgpu_dpm_reset_sdma_is_supported(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
case IP_VERSION(9, 5, 0):
- if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ if ((adev->gfx.mec_fw_version >= 0xf) &&
+ amdgpu_dpm_reset_sdma_is_supported(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 7dc67a22a7a0..8ddc4df06a1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1429,7 +1429,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(5, 0, 2):
case IP_VERSION(5, 0, 5):
if ((adev->sdma.instance[0].fw_version >= 35) &&
- !amdgpu_sriov_vf(adev))
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 3bd44c24f692..51101b0aa2fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -342,7 +342,7 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
if (ring->me > 1) {
- amdgpu_asic_flush_hdp(adev, ring);
+ amdgpu_hdp_flush(adev, ring);
} else {
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
@@ -1348,12 +1348,14 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(5, 2, 3):
case IP_VERSION(5, 2, 4):
if ((adev->sdma.instance[0].fw_version >= 76) &&
- !amdgpu_sriov_vf(adev))
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
case IP_VERSION(5, 2, 5):
if ((adev->sdma.instance[0].fw_version >= 34) &&
- !amdgpu_sriov_vf(adev))
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index db6e41967f12..217040044987 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1356,7 +1356,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(6, 0, 2):
case IP_VERSION(6, 0, 3):
if ((adev->sdma.instance[0].fw_version >= 21) &&
- !amdgpu_sriov_vf(adev))
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
@@ -1389,7 +1390,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
break;
case IP_VERSION(6, 0, 3):
- if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq)
+ if (adev->sdma.instance[0].fw_version >= 29 && !adev->sdma.disable_uq)
adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
break;
case IP_VERSION(6, 1, 0):
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 326ecc8d37d2..2b81344dcd66 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -1337,7 +1337,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->sdma.supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
r = amdgpu_sdma_sysfs_reset_mask_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index e0f139de7991..f7288372ee61 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -45,6 +45,7 @@
#include "dce_v6_0.h"
#include "si.h"
#include "uvd_v3_1.h"
+#include "vce_v1_0.h"
#include "uvd/uvd_4_0_d.h"
@@ -921,8 +922,6 @@ static const u32 hainan_mgcg_cgcg_init[] =
0x3630, 0xfffffff0, 0x00000100,
};
-/* XXX: update when we support VCE */
-#if 0
/* tahiti, pitcairn, verde */
static const struct amdgpu_video_codec_info tahiti_video_codecs_encode_array[] =
{
@@ -940,13 +939,7 @@ static const struct amdgpu_video_codecs tahiti_video_codecs_encode =
.codec_count = ARRAY_SIZE(tahiti_video_codecs_encode_array),
.codec_array = tahiti_video_codecs_encode_array,
};
-#else
-static const struct amdgpu_video_codecs tahiti_video_codecs_encode =
-{
- .codec_count = 0,
- .codec_array = NULL,
-};
-#endif
+
/* oland and hainan don't support encode */
static const struct amdgpu_video_codecs hainan_video_codecs_encode =
{
@@ -1925,6 +1918,14 @@ static int si_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
~VCEPLL_BYPASS_EN_MASK);
if (!evclk || !ecclk) {
+ /*
+ * On some chips, the PLL takes way too long to get out of
+ * sleep mode, causing a timeout waiting on CTLACK/CTLACK2.
+ * Leave the PLL running in bypass mode.
+ */
+ if (adev->pdev->device == 0x6780)
+ return 0;
+
/* Keep the Bypass mode, put PLL to sleep */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
~VCEPLL_SLEEP_MASK);
@@ -2717,7 +2718,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
else
amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block);
- /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
+ amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block);
break;
case CHIP_OLAND:
amdgpu_device_ip_block_add(adev, &si_common_ip_block);
@@ -2735,7 +2736,6 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
else
amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block);
- /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
break;
case CHIP_HAINAN:
amdgpu_device_ip_block_add(adev, &si_common_ip_block);
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 1df00f8a2406..66f650f87243 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -96,6 +96,9 @@ static int si_ih_irq_init(struct amdgpu_device *adev)
pci_set_master(adev->pdev);
si_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -112,6 +115,9 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
@@ -127,6 +133,8 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(IH_RB_CNTL, tmp);
}
+
+out:
return (wptr & ih->ptr_mask);
}
@@ -175,6 +183,10 @@ static int si_ih_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
return amdgpu_irq_init(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h
index cbd4f8951cfa..561462a8332e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sid.h
+++ b/drivers/gpu/drm/amd/amdgpu/sid.h
@@ -582,45 +582,6 @@
#define DMA_PACKET_NOP 0xf
/* VCE */
-#define VCE_STATUS 0x20004
-#define VCE_VCPU_CNTL 0x20014
-#define VCE_CLK_EN (1 << 0)
-#define VCE_VCPU_CACHE_OFFSET0 0x20024
-#define VCE_VCPU_CACHE_SIZE0 0x20028
-#define VCE_VCPU_CACHE_OFFSET1 0x2002c
-#define VCE_VCPU_CACHE_SIZE1 0x20030
-#define VCE_VCPU_CACHE_OFFSET2 0x20034
-#define VCE_VCPU_CACHE_SIZE2 0x20038
-#define VCE_SOFT_RESET 0x20120
-#define VCE_ECPU_SOFT_RESET (1 << 0)
-#define VCE_FME_SOFT_RESET (1 << 2)
-#define VCE_RB_BASE_LO2 0x2016c
-#define VCE_RB_BASE_HI2 0x20170
-#define VCE_RB_SIZE2 0x20174
-#define VCE_RB_RPTR2 0x20178
-#define VCE_RB_WPTR2 0x2017c
-#define VCE_RB_BASE_LO 0x20180
-#define VCE_RB_BASE_HI 0x20184
-#define VCE_RB_SIZE 0x20188
-#define VCE_RB_RPTR 0x2018c
-#define VCE_RB_WPTR 0x20190
-#define VCE_CLOCK_GATING_A 0x202f8
-#define VCE_CLOCK_GATING_B 0x202fc
-#define VCE_UENC_CLOCK_GATING 0x205bc
-#define VCE_UENC_REG_CLOCK_GATING 0x205c0
-#define VCE_FW_REG_STATUS 0x20e10
-# define VCE_FW_REG_STATUS_BUSY (1 << 0)
-# define VCE_FW_REG_STATUS_PASS (1 << 3)
-# define VCE_FW_REG_STATUS_DONE (1 << 11)
-#define VCE_LMI_FW_START_KEYSEL 0x20e18
-#define VCE_LMI_FW_PERIODIC_CTRL 0x20e20
-#define VCE_LMI_CTRL2 0x20e74
-#define VCE_LMI_CTRL 0x20e98
-#define VCE_LMI_VM_CTRL 0x20ea0
-#define VCE_LMI_SWAP_CNTL 0x20eb4
-#define VCE_LMI_SWAP_CNTL1 0x20eb8
-#define VCE_LMI_CACHE_CTRL 0x20ef4
-
#define VCE_CMD_NO_OP 0x00000000
#define VCE_CMD_END 0x00000001
#define VCE_CMD_IB 0x00000002
@@ -629,7 +590,6 @@
#define VCE_CMD_IB_AUTO 0x00000005
#define VCE_CMD_SEMAPHORE 0x00000006
-
//#dce stupp
/* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */
#define CRTC0_REGISTER_OFFSET (0x1b7c - 0x1b7c) //(0x6df0 - 0x6df0)/4
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 9785fada4fa7..42f5d9c0e3af 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -853,10 +853,6 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
{
u32 sol_reg;
- /* CP hangs in IGT reloading test on RN, reset to WA */
- if (adev->asic_type == CHIP_RENOIR)
- return true;
-
if (amdgpu_gmc_need_reset_on_init(adev))
return true;
if (amdgpu_psp_tos_reload_needed(adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 7d17ae56f901..ee8038df17e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -159,6 +159,9 @@ static int tonga_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
tonga_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -196,6 +199,9 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -306,6 +312,10 @@ static int tonga_ih_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih;
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 8dc32787d625..0f5b1719fda5 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -711,6 +711,19 @@ static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev,
return die;
}
+static void umc_v12_0_mca_ipid_parse(struct amdgpu_device *adev, uint64_t ipid,
+ uint32_t *did, uint32_t *ch, uint32_t *umc_inst, uint32_t *sid)
+{
+ if (did)
+ *did = MCA_IPID_2_DIE_ID(ipid);
+ if (ch)
+ *ch = MCA_IPID_2_UMC_CH(ipid);
+ if (umc_inst)
+ *umc_inst = MCA_IPID_2_UMC_INST(ipid);
+ if (sid)
+ *sid = MCA_IPID_2_SOCKET_ID(ipid);
+}
+
struct amdgpu_umc_ras umc_v12_0_ras = {
.ras_block = {
.hw_ops = &umc_v12_0_ras_hw_ops,
@@ -724,5 +737,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
.convert_ras_err_addr = umc_v12_0_convert_error_address,
.get_die_id_from_pa = umc_v12_0_get_die_id,
.get_retire_flip_bits = umc_v12_0_get_retire_flip_bits,
+ .mca_ipid_parse = umc_v12_0_mca_ipid_parse,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
new file mode 100644
index 000000000000..9ae424618556
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
@@ -0,0 +1,839 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * Copyright 2025 Valve Corporation
+ * Copyright 2025 Alexandre Demers
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ * Timur Kristóf <timur.kristof@gmail.com>
+ * Alexandre Demers <alexandre.f.demers@gmail.com>
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_gart.h"
+#include "sid.h"
+#include "vce_v1_0.h"
+#include "vce/vce_1_0_d.h"
+#include "vce/vce_1_0_sh_mask.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+
+#define VCE_V1_0_FW_SIZE (256 * 1024)
+#define VCE_V1_0_STACK_SIZE (64 * 1024)
+#define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1))
+#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
+
+#define VCE_V1_0_GART_PAGE_START \
+ (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS)
+#define VCE_V1_0_GART_ADDR_START \
+ (VCE_V1_0_GART_PAGE_START * AMDGPU_GPU_PAGE_SIZE)
+
+static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
+static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+
+struct vce_v1_0_fw_signature {
+ int32_t offset;
+ uint32_t length;
+ int32_t number;
+ struct {
+ uint32_t chip_id;
+ uint32_t keyselect;
+ uint32_t nonce[4];
+ uint32_t sigval[4];
+ } val[8];
+};
+
+/**
+ * vce_v1_0_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vce_v1_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ return RREG32(mmVCE_RB_RPTR);
+ else
+ return RREG32(mmVCE_RB_RPTR2);
+}
+
+/**
+ * vce_v1_0_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vce_v1_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ return RREG32(mmVCE_RB_WPTR);
+ else
+ return RREG32(mmVCE_RB_WPTR2);
+}
+
+/**
+ * vce_v1_0_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vce_v1_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
+ else
+ WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
+}
+
+static int vce_v1_0_lmi_clean(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ if (RREG32(mmVCE_LMI_STATUS) & 0x337f)
+ return 0;
+
+ mdelay(10);
+ }
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int vce_v1_0_firmware_loaded(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ if (RREG32(mmVCE_STATUS) & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
+ return 0;
+ mdelay(10);
+ }
+
+ dev_err(adev->dev, "VCE not responding, trying to reset the ECPU\n");
+
+ WREG32_P(mmVCE_SOFT_RESET,
+ VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(mmVCE_SOFT_RESET, 0,
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+ mdelay(10);
+ }
+
+ return -ETIMEDOUT;
+}
+
+static void vce_v1_0_init_cg(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32(mmVCE_CLOCK_GATING_A);
+ tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
+ WREG32(mmVCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(mmVCE_CLOCK_GATING_B);
+ tmp |= 0x1e;
+ tmp &= ~0xe100e1;
+ WREG32(mmVCE_CLOCK_GATING_B, tmp);
+
+ tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp &= ~0xff9ff000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ tmp &= ~0x3ff;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+}
+
+/**
+ * vce_v1_0_load_fw_signature - load firmware signature into VCPU BO
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The VCE1 firmware validation mechanism needs a firmware signature.
+ * This function finds the signature appropriate for the current
+ * ASIC and writes that into the VCPU BO.
+ */
+static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev)
+{
+ const struct common_firmware_header *hdr;
+ struct vce_v1_0_fw_signature *sign;
+ unsigned int ucode_offset;
+ uint32_t chip_id;
+ u32 *cpu_addr;
+ int i;
+
+ hdr = (const struct common_firmware_header *)adev->vce.fw->data;
+ ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ cpu_addr = adev->vce.cpu_addr;
+
+ sign = (void *)adev->vce.fw->data + ucode_offset;
+
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ chip_id = 0x01000014;
+ break;
+ case CHIP_VERDE:
+ chip_id = 0x01000015;
+ break;
+ case CHIP_PITCAIRN:
+ chip_id = 0x01000016;
+ break;
+ default:
+ dev_err(adev->dev, "asic_type %#010x was not found!", adev->asic_type);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < le32_to_cpu(sign->number); ++i) {
+ if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
+ break;
+ }
+
+ if (i == le32_to_cpu(sign->number)) {
+ dev_err(adev->dev, "chip_id 0x%x for %s was not found in VCE firmware",
+ chip_id, amdgpu_asic_name[adev->asic_type]);
+ return -EINVAL;
+ }
+
+ cpu_addr += (256 - 64) / 4;
+ memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16);
+ cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64);
+
+ memset_io(&cpu_addr[5], 0, 44);
+ memcpy_toio(&cpu_addr[16], &sign[1], hdr->ucode_size_bytes - sizeof(*sign));
+
+ cpu_addr += (le32_to_cpu(sign->length) + 64) / 4;
+ memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16);
+
+ adev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
+
+ return 0;
+}
+
+static int vce_v1_0_wait_for_fw_validation(struct amdgpu_device *adev)
+{
+ int i;
+
+ dev_dbg(adev->dev, "VCE keyselect: %d", adev->vce.keyselect);
+ WREG32(mmVCE_LMI_FW_START_KEYSEL, adev->vce.keyselect);
+
+ for (i = 0; i < 10; ++i) {
+ mdelay(10);
+ if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)
+ break;
+ }
+
+ if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)) {
+ dev_err(adev->dev, "VCE FW validation timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__PASS_MASK)) {
+ dev_err(adev->dev, "VCE FW validation failed\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < 10; ++i) {
+ mdelay(10);
+ if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK))
+ break;
+ }
+
+ if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK) {
+ dev_err(adev->dev, "VCE FW busy timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static int vce_v1_0_mc_resume(struct amdgpu_device *adev)
+{
+ uint32_t offset;
+ uint32_t size;
+
+ /*
+ * When the keyselect is already set, don't perturb VCE FW.
+ * Validation seems to always fail the second time.
+ */
+ if (RREG32(mmVCE_LMI_FW_START_KEYSEL)) {
+ dev_dbg(adev->dev, "keyselect already set: 0x%x (on CPU: 0x%x)\n",
+ RREG32(mmVCE_LMI_FW_START_KEYSEL), adev->vce.keyselect);
+
+ WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
+ return 0;
+ }
+
+ WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
+ WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
+ WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
+ WREG32(mmVCE_CLOCK_GATING_B, 0);
+
+ WREG32_P(mmVCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4);
+
+ WREG32(mmVCE_LMI_CTRL, 0x00398000);
+
+ WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
+ WREG32(mmVCE_LMI_SWAP_CNTL, 0);
+ WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
+ WREG32(mmVCE_LMI_VM_CTRL, 0);
+
+ WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES);
+
+ offset = adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET;
+ size = VCE_V1_0_FW_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
+
+ offset += size;
+ size = VCE_V1_0_STACK_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
+
+ offset += size;
+ size = VCE_V1_0_DATA_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
+
+ WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
+
+ return vce_v1_0_wait_for_fw_validation(adev);
+}
+
+/**
+ * vce_v1_0_is_idle() - Check idle status of VCE1 IP block
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ *
+ * Check whether VCE is busy according to VCE_STATUS.
+ * Also check whether the SRBM thinks VCE is busy, although
+ * SRBM_STATUS.VCE_BUSY seems to be bogus because it
+ * appears to mirror the VCE_STATUS.VCPU_REPORT_FW_LOADED bit.
+ */
+static bool vce_v1_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool busy =
+ (RREG32(mmVCE_STATUS) & (VCE_STATUS__JOB_BUSY_MASK | VCE_STATUS__UENC_BUSY_MASK)) ||
+ (RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
+
+ return !busy;
+}
+
+static int vce_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ unsigned int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ udelay(1);
+ if (vce_v1_0_is_idle(ip_block))
+ return 0;
+ }
+ return -ETIMEDOUT;
+}
+
+/**
+ * vce_v1_0_start - start VCE block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the VCE block
+ */
+static int vce_v1_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ WREG32_P(mmVCE_STATUS, 1, ~1);
+
+ r = vce_v1_0_mc_resume(adev);
+ if (r)
+ return r;
+
+ ring = &adev->vce.ring[0];
+ WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->vce.ring[1];
+ WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_BASE_LO2, lower_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
+
+ WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
+ ~VCE_VCPU_CNTL__CLK_EN_MASK);
+
+ WREG32_P(mmVCE_SOFT_RESET,
+ VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK,
+ ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
+
+ mdelay(100);
+
+ WREG32_P(mmVCE_SOFT_RESET, 0,
+ ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
+
+ r = vce_v1_0_firmware_loaded(adev);
+
+ /* Clear VCE_STATUS, otherwise SRBM thinks VCE1 is busy. */
+ WREG32(mmVCE_STATUS, 0);
+
+ if (r) {
+ dev_err(adev->dev, "VCE not responding, giving up\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int vce_v1_0_stop(struct amdgpu_device *adev)
+{
+ struct amdgpu_ip_block *ip_block;
+ int status;
+ int i;
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE);
+ if (!ip_block)
+ return -EINVAL;
+
+ if (vce_v1_0_lmi_clean(adev))
+ dev_warn(adev->dev, "VCE not idle\n");
+
+ if (vce_v1_0_wait_for_idle(ip_block))
+ dev_warn(adev->dev, "VCE busy: VCE_STATUS=0x%x, SRBM_STATUS2=0x%x\n",
+ RREG32(mmVCE_STATUS), RREG32(mmSRBM_STATUS2));
+
+ /* Stall UMC and register bus before resetting VCPU */
+ WREG32_P(mmVCE_LMI_CTRL2, 1 << 8, ~(1 << 8));
+
+ for (i = 0; i < 100; ++i) {
+ status = RREG32(mmVCE_LMI_STATUS);
+ if (status & 0x240)
+ break;
+ mdelay(1);
+ }
+
+ WREG32_P(mmVCE_VCPU_CNTL, 0, ~VCE_VCPU_CNTL__CLK_EN_MASK);
+
+ WREG32_P(mmVCE_SOFT_RESET,
+ VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK,
+ ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
+
+ WREG32(mmVCE_STATUS, 0);
+
+ return 0;
+}
+
+static void vce_v1_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
+{
+ u32 tmp;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) {
+ tmp = RREG32(mmVCE_CLOCK_GATING_A);
+ tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
+ WREG32(mmVCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp &= ~0x1ff000;
+ tmp |= 0xff800000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ tmp &= ~0x3ff;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+ } else {
+ tmp = RREG32(mmVCE_CLOCK_GATING_A);
+ tmp &= ~VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
+ WREG32(mmVCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp |= 0x1ff000;
+ tmp &= ~0xff800000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ tmp |= 0x3ff;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+ }
+}
+
+static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_early_init(adev);
+ if (r)
+ return r;
+
+ adev->vce.num_rings = 2;
+
+ vce_v1_0_set_ring_funcs(adev);
+ vce_v1_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * vce_v1_0_ensure_vcpu_bo_32bit_addr() - ensure the VCPU BO has a 32-bit address
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Due to various hardware limitations, the VCE1 requires
+ * the VCPU BO to be in the low 32 bit address range.
+ * Ensure that the VCPU BO has a 32-bit GPU address,
+ * or return an error code when that isn't possible.
+ *
+ * To accomodate that, we put GART to the LOW address range
+ * and reserve some GART pages where we map the VCPU BO,
+ * so that it gets a 32-bit address.
+ */
+static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
+{
+ u64 gpu_addr = amdgpu_bo_gpu_offset(adev->vce.vcpu_bo);
+ u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo);
+ u64 max_vcpu_bo_addr = 0xffffffff - bo_size;
+ u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE;
+ u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
+ u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID;
+
+ /*
+ * Check if the VCPU BO already has a 32-bit address.
+ * Eg. if MC is configured to put VRAM in the low address range.
+ */
+ if (gpu_addr <= max_vcpu_bo_addr)
+ return 0;
+
+ /* Check if we can map the VCPU BO in GART to a 32-bit address. */
+ if (adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START > max_vcpu_bo_addr)
+ return -EINVAL;
+
+ amdgpu_gart_map_vram_range(adev, pa, VCE_V1_0_GART_PAGE_START,
+ num_pages, flags, adev->gart.ptr);
+ adev->vce.gpu_addr = adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START;
+ if (adev->vce.gpu_addr > max_vcpu_bo_addr)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int r, i;
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq);
+ if (r)
+ return r;
+
+ r = amdgpu_vce_sw_init(adev, VCE_V1_0_FW_SIZE +
+ VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE);
+ if (r)
+ return r;
+
+ r = amdgpu_vce_resume(adev);
+ if (r)
+ return r;
+ r = vce_v1_0_load_fw_signature(adev);
+ if (r)
+ return r;
+ r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
+
+ ring = &adev->vce.ring[i];
+ sprintf(ring->name, "vce%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
+ hw_prio, NULL);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static int vce_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_suspend(adev);
+ if (r)
+ return r;
+
+ return amdgpu_vce_sw_fini(adev);
+}
+
+/**
+ * vce_v1_0_hw_init - start and test VCE block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vce_v1_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vce(adev, true);
+ else
+ amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
+ if (r)
+ return r;
+ }
+
+ dev_info(adev->dev, "VCE initialized successfully.\n");
+
+ return 0;
+}
+
+static int vce_v1_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = vce_v1_0_stop(ip_block->adev);
+ if (r)
+ return r;
+
+ cancel_delayed_work_sync(&ip_block->adev->vce.idle_work);
+ return 0;
+}
+
+static int vce_v1_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_vce(adev, false);
+ } else {
+ amdgpu_asic_set_vce_clocks(adev, 0, 0);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = vce_v1_0_hw_fini(ip_block);
+ if (r) {
+ dev_err(adev->dev, "vce_v1_0_hw_fini() failed with error %i", r);
+ return r;
+ }
+
+ return amdgpu_vce_suspend(adev);
+}
+
+static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_resume(adev);
+ if (r)
+ return r;
+ r = vce_v1_0_load_fw_signature(adev);
+ if (r)
+ return r;
+ r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
+ if (r)
+ return r;
+
+ return vce_v1_0_hw_init(ip_block);
+}
+
+static int vce_v1_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t val = 0;
+
+ if (state == AMDGPU_IRQ_STATE_ENABLE)
+ val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
+
+ WREG32_P(mmVCE_SYS_INT_EN, val,
+ ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
+ return 0;
+}
+
+static int vce_v1_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg(adev->dev, "IH: VCE\n");
+ switch (entry->src_data[0]) {
+ case 0:
+ case 1:
+ amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
+ break;
+ default:
+ dev_err(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int vce_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ vce_v1_0_init_cg(adev);
+ vce_v1_0_enable_mgcg(adev, state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static int vce_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /*
+ * This doesn't actually powergate the VCE block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ if (state == AMD_PG_STATE_GATE)
+ return vce_v1_0_stop(adev);
+ else
+ return vce_v1_0_start(adev);
+}
+
+static const struct amd_ip_funcs vce_v1_0_ip_funcs = {
+ .name = "vce_v1_0",
+ .early_init = vce_v1_0_early_init,
+ .sw_init = vce_v1_0_sw_init,
+ .sw_fini = vce_v1_0_sw_fini,
+ .hw_init = vce_v1_0_hw_init,
+ .hw_fini = vce_v1_0_hw_fini,
+ .suspend = vce_v1_0_suspend,
+ .resume = vce_v1_0_resume,
+ .is_idle = vce_v1_0_is_idle,
+ .wait_for_idle = vce_v1_0_wait_for_idle,
+ .set_clockgating_state = vce_v1_0_set_clockgating_state,
+ .set_powergating_state = vce_v1_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs vce_v1_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_VCE,
+ .align_mask = 0xf,
+ .nop = VCE_CMD_NO_OP,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = vce_v1_0_ring_get_rptr,
+ .get_wptr = vce_v1_0_ring_get_wptr,
+ .set_wptr = vce_v1_0_ring_set_wptr,
+ .parse_cs = amdgpu_vce_ring_parse_cs,
+ .emit_frame_size = 6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
+ .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
+ .emit_ib = amdgpu_vce_ring_emit_ib,
+ .emit_fence = amdgpu_vce_ring_emit_fence,
+ .test_ring = amdgpu_vce_ring_test_ring,
+ .test_ib = amdgpu_vce_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vce_ring_begin_use,
+ .end_use = amdgpu_vce_ring_end_use,
+};
+
+static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ adev->vce.ring[i].funcs = &vce_v1_0_ring_funcs;
+ adev->vce.ring[i].me = i;
+ }
+};
+
+static const struct amdgpu_irq_src_funcs vce_v1_0_irq_funcs = {
+ .set = vce_v1_0_set_interrupt_state,
+ .process = vce_v1_0_process_interrupt,
+};
+
+static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->vce.irq.num_types = 1;
+ adev->vce.irq.funcs = &vce_v1_0_irq_funcs;
+};
+
+const struct amdgpu_ip_block_version vce_v1_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCE,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vce_v1_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.h
new file mode 100644
index 000000000000..206e7bec897f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * Copyright 2025 Valve Corporation
+ * Copyright 2025 Alexandre Demers
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCE_V1_0_H__
+#define __VCE_V1_0_H__
+
+extern const struct amdgpu_ip_block_version vce_v1_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index bee3e904a6bc..8ea8a6193492 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -407,6 +407,11 @@ static void vce_v2_0_enable_mgcg(struct amdgpu_device *adev, bool enable,
static int vce_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_early_init(adev);
+ if (r)
+ return r;
adev->vce.num_rings = 2;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 708123899c41..719e9643c43d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -399,6 +399,7 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
static int vce_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
+ int r;
adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
@@ -407,6 +408,10 @@ static int vce_v3_0_early_init(struct amdgpu_ip_block *ip_block)
(AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
return -ENOENT;
+ r = amdgpu_vce_early_init(adev);
+ if (r)
+ return r;
+
adev->vce.num_rings = 3;
vce_v3_0_set_ring_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 335bda64ff5b..2d64002bed61 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -410,6 +410,11 @@ static int vce_v4_0_stop(struct amdgpu_device *adev)
static int vce_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_early_init(adev);
+ if (r)
+ return r;
if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
adev->vce.num_rings = 1;