From bd607166af7fe31f8d8e9c575f4561a4b56b9f24 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Fri, 13 Mar 2020 09:21:55 -0400 Subject: drm/amdgpu: Enable reading FRU chip via I2C v3 Allow for reading of information like manufacturer, product number and serial number from the FRU chip. Report the serial number as the new sysfs file serial_number. Note that this only works on server cards, as consumer cards do not feature the FRU chip, which contains this information. v2: Add documentation to amdgpu.rst, add helper functions, rename functions for consistency, fix bad starting offset v3: Remove testing definitions Signed-off-by: Kent Russell Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 90 ++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index faa3e7102156..f422ef58b4d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -64,6 +64,7 @@ #include "amdgpu_xgmi.h" #include "amdgpu_ras.h" #include "amdgpu_pmu.h" +#include "amdgpu_fru_eeprom.h" #include #include @@ -137,6 +138,72 @@ static DEVICE_ATTR(pcie_replay_count, S_IRUGO, static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); +/** + * DOC: product_name + * + * The amdgpu driver provides a sysfs API for reporting the product name + * for the device + * The file serial_number is used for this and returns the product name + * as returned from the FRU. + * NOTE: This is only available for certain server cards + */ + +static ssize_t amdgpu_device_get_product_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name); +} + +static DEVICE_ATTR(product_name, S_IRUGO, + amdgpu_device_get_product_name, NULL); + +/** + * DOC: product_number + * + * The amdgpu driver provides a sysfs API for reporting the part number + * for the device + * The file serial_number is used for this and returns the part number + * as returned from the FRU. + * NOTE: This is only available for certain server cards + */ + +static ssize_t amdgpu_device_get_product_number(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number); +} + +static DEVICE_ATTR(product_number, S_IRUGO, + amdgpu_device_get_product_number, NULL); + +/** + * DOC: serial_number + * + * The amdgpu driver provides a sysfs API for reporting the serial number + * for the device + * The file serial_number is used for this and returns the serial number + * as returned from the FRU. + * NOTE: This is only available for certain server cards + */ + +static ssize_t amdgpu_device_get_serial_number(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial); +} + +static DEVICE_ATTR(serial_number, S_IRUGO, + amdgpu_device_get_serial_number, NULL); + /** * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control * @@ -1975,6 +2042,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev); + amdgpu_fru_get_product_info(adev); + init_failed: if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, true); @@ -3189,6 +3258,24 @@ fence_driver_init: return r; } + r = device_create_file(adev->dev, &dev_attr_product_name); + if (r) { + dev_err(adev->dev, "Could not create product_name"); + return r; + } + + r = device_create_file(adev->dev, &dev_attr_product_number); + if (r) { + dev_err(adev->dev, "Could not create product_number"); + return r; + } + + r = device_create_file(adev->dev, &dev_attr_serial_number); + if (r) { + dev_err(adev->dev, "Could not create serial_number"); + return r; + } + if (IS_ENABLED(CONFIG_PERF_EVENTS)) r = amdgpu_pmu_init(adev); if (r) @@ -3271,6 +3358,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pcie_replay_count); if (adev->ucode_sysfs_en) amdgpu_ucode_sysfs_fini(adev); + device_remove_file(adev->dev, &dev_attr_product_name); + device_remove_file(adev->dev, &dev_attr_product_number); + device_remove_file(adev->dev, &dev_attr_serial_number); if (IS_ENABLED(CONFIG_PERF_EVENTS)) amdgpu_pmu_fini(adev); if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) -- cgit v1.2.3 From 3aa0115d238c71423d0e212138678a8cf51d4361 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 4 Mar 2020 14:02:55 +0800 Subject: drm/amdgpu: cleanup all virtualization detection routine we need to move virt detection much earlier because: 1) HW team confirms us that RCC_IOV_FUNC_IDENTIFIER will always be at DE5 (dw) mmio offset from vega10, this way there is no need to implement detect_hw_virt() routine in each nbio/chip file. for VI SRIOV chip (tonga & fiji), the BIF_IOV_FUNC_IDENTIFIER is at 0x1503 2) we need to acknowledged we are SRIOV VF before we do IP discovery because the IP discovery content will be updated by host everytime after it recieved a new coming "REQ_GPU_INIT_DATA" request from guest (there will be patches for this new handshake soon). Signed-off-by: Monk Liu Reviewed-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 33 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 6 ++++ drivers/gpu/drm/amd/amdgpu/cik.c | 8 ------ drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 18 ------------ drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 18 ------------ drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 7 ----- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 18 ------------ drivers/gpu/drm/amd/amdgpu/nv.c | 2 -- drivers/gpu/drm/amd/amdgpu/si.c | 8 ------ drivers/gpu/drm/amd/amdgpu/soc15.c | 1 - drivers/gpu/drm/amd/amdgpu/vi.c | 24 ---------------- .../amd/include/asic_reg/nbif/nbif_6_1_offset.h | 2 ++ .../amd/include/asic_reg/nbio/nbio_7_0_offset.h | 2 ++ .../amd/include/asic_reg/nbio/nbio_7_4_offset.h | 2 ++ 16 files changed, 48 insertions(+), 105 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f422ef58b4d8..449720086fbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3055,6 +3055,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10) adev->enable_mes = true; + /* detect hw virtualization here */ + amdgpu_detect_virtualization(adev); + if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) { r = amdgpu_discovery_init(adev); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 919bd566ba3c..edaac242ff85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -77,7 +77,6 @@ struct amdgpu_nbio_funcs { u32 *flags); void (*ih_control)(struct amdgpu_device *adev); void (*init_registers)(struct amdgpu_device *adev); - void (*detect_hw_virt)(struct amdgpu_device *adev); void (*remap_hdp_registers)(struct amdgpu_device *adev); void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev); void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index adc813cde8e2..43a1ee332727 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -287,3 +287,36 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) } } } + +void amdgpu_detect_virtualization(struct amdgpu_device *adev) +{ + uint32_t reg; + + switch (adev->asic_type) { + case CHIP_TONGA: + case CHIP_FIJI: + reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER); + break; + case CHIP_VEGA10: + case CHIP_VEGA20: + case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_ARCTURUS: + reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER); + break; + default: /* other chip doesn't support SRIOV */ + reg = 0; + break; + } + + if (reg & 1) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; + + if (reg & 0x80000000) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; + + if (!reg) { + if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ + adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 0a95b137eadb..74f9843fce82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -30,6 +30,11 @@ #define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */ #define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */ +/* all asic after AI use this offset */ +#define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5 +/* tonga/fiji use this offset */ +#define mmBIF_IOV_FUNC_IDENTIFIER 0x1503 + struct amdgpu_mm_table { struct amdgpu_bo *bo; uint32_t *cpu_addr; @@ -305,4 +310,5 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, unsigned int key, unsigned int chksum); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); +void amdgpu_detect_virtualization(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 006f21ef7ddf..db68ffa27984 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1811,12 +1811,6 @@ static uint32_t cik_get_rev_id(struct amdgpu_device *adev) >> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT; } -static void cik_detect_hw_virtualization(struct amdgpu_device *adev) -{ - if (is_virtual_machine()) /* passthrough mode */ - adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; -} - static void cik_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) { @@ -2179,8 +2173,6 @@ static const struct amdgpu_ip_block_version cik_common_ip_block = int cik_set_ip_blocks(struct amdgpu_device *adev) { - cik_detect_hw_virtualization(adev); - switch (adev->asic_type) { case CHIP_BONAIRE: amdgpu_device_ip_block_add(adev, &cik_common_ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index f3a3fe746222..cbcf04578b99 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -290,23 +290,6 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = { .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -static void nbio_v2_3_detect_hw_virt(struct amdgpu_device *adev) -{ - uint32_t reg; - - reg = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_RCC_IOV_FUNC_IDENTIFIER); - if (reg & 1) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; - - if (reg & 0x80000000) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; - - if (!reg) { - if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ - adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; - } -} - static void nbio_v2_3_init_registers(struct amdgpu_device *adev) { uint32_t def, data; @@ -338,6 +321,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .get_clockgating_state = nbio_v2_3_get_clockgating_state, .ih_control = nbio_v2_3_ih_control, .init_registers = nbio_v2_3_init_registers, - .detect_hw_virt = nbio_v2_3_detect_hw_virt, .remap_hdp_registers = nbio_v2_3_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 635d9e1fc0a3..7b2fb050407d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -241,23 +241,6 @@ const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK }; -static void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) -{ - uint32_t reg; - - reg = RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_IOV_FUNC_IDENTIFIER); - if (reg & 1) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; - - if (reg & 0x80000000) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; - - if (!reg) { - if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ - adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; - } -} - static void nbio_v6_1_init_registers(struct amdgpu_device *adev) { uint32_t def, data; @@ -294,5 +277,4 @@ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { .get_clockgating_state = nbio_v6_1_get_clockgating_state, .ih_control = nbio_v6_1_ih_control, .init_registers = nbio_v6_1_init_registers, - .detect_hw_virt = nbio_v6_1_detect_hw_virt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index d6cbf26074bc..d34628e113fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -280,12 +280,6 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = { .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -static void nbio_v7_0_detect_hw_virt(struct amdgpu_device *adev) -{ - if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ - adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; -} - static void nbio_v7_0_init_registers(struct amdgpu_device *adev) { @@ -310,6 +304,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { .get_clockgating_state = nbio_v7_0_get_clockgating_state, .ih_control = nbio_v7_0_ih_control, .init_registers = nbio_v7_0_init_registers, - .detect_hw_virt = nbio_v7_0_detect_hw_virt, .remap_hdp_registers = nbio_v7_0_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 149d386590df..41c53c149852 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -292,23 +292,6 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, }; -static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev) -{ - uint32_t reg; - - reg = RREG32_SOC15(NBIO, 0, mmRCC_IOV_FUNC_IDENTIFIER); - if (reg & 1) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; - - if (reg & 0x80000000) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; - - if (!reg) { - if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ - adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; - } -} - static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { @@ -561,7 +544,6 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .get_clockgating_state = nbio_v7_4_get_clockgating_state, .ih_control = nbio_v7_4_ih_control, .init_registers = nbio_v7_4_init_registers, - .detect_hw_virt = nbio_v7_4_detect_hw_virt, .remap_hdp_registers = nbio_v7_4_remap_hdp_registers, .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring, .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 033cbbca2072..a67d78d7eeeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -465,8 +465,6 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.funcs = &nbio_v2_3_funcs; adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - adev->nbio.funcs->detect_hw_virt(adev); - if (amdgpu_sriov_vf(adev)) adev->virt.ops = &xgpu_nv_virt_ops; diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 4d415bfdb42f..153db3f763bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1249,12 +1249,6 @@ static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) return 0; } -static void si_detect_hw_virtualization(struct amdgpu_device *adev) -{ - if (is_virtual_machine()) /* passthrough mode */ - adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; -} - static void si_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) { @@ -2165,8 +2159,6 @@ static const struct amdgpu_ip_block_version si_common_ip_block = int si_set_ip_blocks(struct amdgpu_device *adev) { - si_detect_hw_virtualization(adev); - switch (adev->asic_type) { case CHIP_VERDE: case CHIP_TAHITI: diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index a40499d51c93..a8c90d83a9ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -712,7 +712,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) adev->df.funcs = &df_v1_7_funcs; adev->rev_id = soc15_get_rev_id(adev); - adev->nbio.funcs->detect_hw_virt(adev); if (amdgpu_sriov_vf(adev)) adev->virt.ops = &xgpu_ai_virt_ops; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 78b35901643b..0a90c296409b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -448,27 +448,6 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev, return true; } -static void vi_detect_hw_virtualization(struct amdgpu_device *adev) -{ - uint32_t reg = 0; - - if (adev->asic_type == CHIP_TONGA || - adev->asic_type == CHIP_FIJI) { - reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER); - /* bit0: 0 means pf and 1 means vf */ - if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, FUNC_IDENTIFIER)) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; - /* bit31: 0 means disable IOV and 1 means enable */ - if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, IOV_ENABLE)) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; - } - - if (reg == 0) { - if (is_virtual_machine()) /* passthrough mode exclus sr-iov mode */ - adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; - } -} - static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = { {mmGRBM_STATUS}, {mmGRBM_STATUS2}, @@ -1730,9 +1709,6 @@ static const struct amdgpu_ip_block_version vi_common_ip_block = int vi_set_ip_blocks(struct amdgpu_device *adev) { - /* in early init stage, vbios code won't work */ - vi_detect_hw_virtualization(adev); - if (amdgpu_sriov_vf(adev)) adev->virt.ops = &xgpu_vi_virt_ops; diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_1_offset.h index 68d0ffad28c7..92fd27c26a77 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_1_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_1_offset.h @@ -1162,8 +1162,10 @@ #define mmRCC_CONFIG_MEMSIZE_BASE_IDX 0 #define mmRCC_CONFIG_RESERVED 0x0de4 // duplicate #define mmRCC_CONFIG_RESERVED_BASE_IDX 0 +#ifndef mmRCC_IOV_FUNC_IDENTIFIER #define mmRCC_IOV_FUNC_IDENTIFIER 0x0de5 // duplicate #define mmRCC_IOV_FUNC_IDENTIFIER_BASE_IDX 0 +#endif // addressBlock: syshub_mmreg_ind_syshubdec diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_offset.h index 435462294fbc..a7cd760ebf8f 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_offset.h @@ -4251,8 +4251,10 @@ #define mmRCC_CONFIG_MEMSIZE_BASE_IDX 2 #define mmRCC_CONFIG_RESERVED 0x00c4 #define mmRCC_CONFIG_RESERVED_BASE_IDX 2 +#ifndef mmRCC_IOV_FUNC_IDENTIFIER #define mmRCC_IOV_FUNC_IDENTIFIER 0x00c5 #define mmRCC_IOV_FUNC_IDENTIFIER_BASE_IDX 2 +#endif // addressBlock: nbio_nbif0_rcc_dev0_BIFDEC1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h index ce5830ebe095..0c5a08bc034a 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h @@ -2687,8 +2687,10 @@ #define mmRCC_CONFIG_MEMSIZE_BASE_IDX 2 #define mmRCC_CONFIG_RESERVED 0x00c4 #define mmRCC_CONFIG_RESERVED_BASE_IDX 2 +#ifndef mmRCC_IOV_FUNC_IDENTIFIER #define mmRCC_IOV_FUNC_IDENTIFIER 0x00c5 #define mmRCC_IOV_FUNC_IDENTIFIER_BASE_IDX 2 +#endif // addressBlock: nbio_nbif0_rcc_dev0_BIFDEC1 -- cgit v1.2.3 From 61380faa4b4cc577df8a7ff5db5859bac6b351f7 Mon Sep 17 00:00:00 2001 From: John Clements Date: Wed, 25 Mar 2020 16:01:14 +0800 Subject: drm/amdgpu: disable ras query and iject during gpu reset added flag to ras context to indicate if ras query functionality is ready Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 24 +++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 4 ++++ 3 files changed, 28 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 449720086fbc..f88fe7fd78ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4168,6 +4168,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, need_full_reset = job_signaled = false; INIT_LIST_HEAD(&device_list); + amdgpu_ras_set_error_query_ready(adev, false); + dev_info(adev->dev, "GPU %s begin!\n", (in_ras_intr && !use_baco) ? "jobs stop":"reset"); @@ -4224,6 +4226,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { if (tmp_adev != adev) { + amdgpu_ras_set_error_query_ready(tmp_adev, false); amdgpu_device_lock_adev(tmp_adev, false); if (!amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_pre_reset(tmp_adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3c32a94d2424..9e9e0f7747b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -80,6 +80,20 @@ atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr); +void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready) +{ + if (adev) + amdgpu_ras_get_context(adev)->error_query_ready = ready; +} + +bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev) +{ + if (adev) + return amdgpu_ras_get_context(adev)->error_query_ready; + + return false; +} + static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -281,7 +295,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * struct ras_debug_if data; int ret = 0; - if (amdgpu_ras_intr_triggered()) { + if (!amdgpu_ras_get_error_query_ready(adev)) { DRM_WARN("RAS WARN: error injection currently inaccessible\n"); return size; } @@ -399,7 +413,7 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, .head = obj->head, }; - if (amdgpu_ras_intr_triggered()) + if (!amdgpu_ras_get_error_query_ready(obj->adev)) return snprintf(buf, PAGE_SIZE, "Query currently inaccessible\n"); @@ -1886,8 +1900,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, } /* in resume phase, no need to create ras fs node */ - if (adev->in_suspend || adev->in_gpu_reset) + if (adev->in_suspend || adev->in_gpu_reset) { + amdgpu_ras_set_error_query_ready(adev, true); return 0; + } if (ih_info->cb) { r = amdgpu_ras_interrupt_add_handler(adev, ih_info); @@ -1899,6 +1915,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, if (r) goto sysfs; + amdgpu_ras_set_error_query_ready(adev, true); + return 0; cleanup: amdgpu_ras_sysfs_remove(adev, ras_block); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 55c3eceb390d..e7df5d8429f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -334,6 +334,8 @@ struct amdgpu_ras { uint32_t flags; bool reboot; struct amdgpu_ras_eeprom_control eeprom_control; + + bool error_query_ready; }; struct ras_fs_data { @@ -629,4 +631,6 @@ static inline void amdgpu_ras_intr_cleared(void) void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); +void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready); + #endif -- cgit v1.2.3 From 122078de168b8380e9dde15a5c04a5412e710cb6 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 4 Mar 2020 23:51:51 +0800 Subject: drm/amdgpu: equip new req_init_data handshake by this new handshake host side can prepare vbios/ip-discovery and pf&vf exchange data upon recieving this request without stopping world switch. this way the world switch is less impacted by VF's exclusive mode request Signed-off-by: Monk Liu Reviewed-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/nv.c | 15 +++++++++------ 2 files changed, 28 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f88fe7fd78ca..8d39ed47d65a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1798,6 +1798,21 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) amdgpu_amdkfd_device_probe(adev); if (amdgpu_sriov_vf(adev)) { + /* handle vbios stuff prior full access mode for new handshake */ + if (adev->virt.req_init_data_ver == 1) { + if (!amdgpu_get_bios(adev)) { + DRM_ERROR("failed to get vbios\n"); + return -EINVAL; + } + + r = amdgpu_atombios_init(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atombios_init failed\n"); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); + return r; + } + } + r = amdgpu_virt_request_full_gpu(adev, true); if (r) return -EAGAIN; @@ -1830,6 +1845,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) } /* get the vbios after the asic_funcs are set up */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { + /* skip vbios handling for new handshake */ + if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1) + continue; + /* Read BIOS */ if (!amdgpu_get_bios(adev)) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index a67d78d7eeeb..7768880fcccf 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -457,16 +457,19 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) { int r; - /* Set IP register base before any HW register access */ - r = nv_reg_base_init(adev); - if (r) - return r; - adev->nbio.funcs = &nbio_v2_3_funcs; adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { adev->virt.ops = &xgpu_nv_virt_ops; + /* try send GPU_INIT_DATA request to host */ + amdgpu_virt_request_init_data(adev); + } + + /* Set IP register base before any HW register access */ + r = nv_reg_base_init(adev); + if (r) + return r; switch (adev->asic_type) { case CHIP_NAVI10: -- cgit v1.2.3 From dffa11b4f74b1572341a667ec7a006420dc48626 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 4 Mar 2020 21:33:27 +0800 Subject: drm/amdgpu: adjust sequence of ip_discovery init and timeout_setting what: 1)move timtout setting before ip_early_init to reduce exclusive mode cost for SRIOV 2)move ip_discovery_init() to inside of amdgpu_discovery_reg_base_init() it is a prepare for the later upcoming patches. why: in later upcoming patches we would use a new mailbox event -- "req_gpu_init_data", which is a callback hooked in adev->virt.ops and this callback send a new event "REQ_GPU_INIT_DAT" to host to notify host to do some preparation like "IP discovery/vbios on the VF FB" and this callback must be: A) invoked after set_ip_block() because virt.ops is configured during set_ip_block() B) invoked before ip_discovery_init() becausen ip_discovery_init() need host side prepares everything in VF FB first. current place of ip_discovery_init() is before we can invoke callback of adev->virt.ops, thus we must move ip_discovery_init() to a place after the adev->virt.ops all settle done, and the perfect place is in amdgpu_discovery_reg_base_init() Signed-off-by: Monk Liu Reviewed-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 10 ++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h | 1 - 3 files changed, 10 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8d39ed47d65a..3c19ae0b13b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3077,12 +3077,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* detect hw virtualization here */ amdgpu_detect_virtualization(adev); - if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) { - r = amdgpu_discovery_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_discovery_init failed\n"); - return r; - } + r = amdgpu_device_get_job_timeout_settings(adev); + if (r) { + dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); + return r; } /* early init functions */ @@ -3090,12 +3088,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; - r = amdgpu_device_get_job_timeout_settings(adev); - if (r) { - dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - return r; - } - /* doorbell bar mapping and doorbell index init*/ amdgpu_device_doorbell_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 37e1fcf970b8..43bb22ad8add 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -156,7 +156,7 @@ static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size return !!(amdgpu_discovery_calculate_checksum(data, size) == expected); } -int amdgpu_discovery_init(struct amdgpu_device *adev) +static int amdgpu_discovery_init(struct amdgpu_device *adev) { struct table_info *info; struct binary_header *bhdr; @@ -255,10 +255,12 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) uint8_t num_base_address; int hw_ip; int i, j, k; + int r; - if (!adev->discovery) { - DRM_ERROR("ip discovery uninitialized\n"); - return -EINVAL; + r = amdgpu_discovery_init(adev); + if (r) { + DRM_ERROR("amdgpu_discovery_init failed\n"); + return r; } bhdr = (struct binary_header *)adev->discovery; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index ba78e15d9b05..d50d597c45ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -26,7 +26,6 @@ #define DISCOVERY_TMR_SIZE (64 << 10) -int amdgpu_discovery_init(struct amdgpu_device *adev); void amdgpu_discovery_fini(struct amdgpu_device *adev); int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev); int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, -- cgit v1.2.3 From 2f2941324c65bf23695038968cecab4e5cde647e Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Tue, 10 Mar 2020 18:12:13 +0800 Subject: drm/amdgpu: postpone entering fullaccess mode if host support new handshake we only need to enter fullaccess_mode in ip_init() part, otherwise we need to do it before reading vbios (becuase host prepares vbios for VF only after received REQ_GPU_INIT event under legacy handshake) Signed-off-by: Monk Liu Reviewed-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3c19ae0b13b9..a97492f3bc42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1812,10 +1812,14 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return r; } } + } + /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios + * will not be prepared by host for this VF */ + if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) { r = amdgpu_virt_request_full_gpu(adev, true); if (r) - return -EAGAIN; + return r; } adev->pm.pp_feature = amdgpu_pp_feature_mask; @@ -1975,6 +1979,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) return r; + if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) { + r = amdgpu_virt_request_full_gpu(adev, true); + if (r) + return -EAGAIN; + } + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; -- cgit v1.2.3 From b7b2a316b95e09ad51db0fd18c5f291051b06117 Mon Sep 17 00:00:00 2001 From: Jiawei Date: Thu, 26 Mar 2020 15:10:51 +0800 Subject: drm/amdgpu: extend compute job timeout extend compute lockup timeout to 60000 for SR-IOV. Reviewed-by: Emily Deng Signed-off-by: Jiawei Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a97492f3bc42..90601966c524 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2867,12 +2867,12 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) * By default timeout for non compute jobs is 10000. * And there is no timeout enforced on compute jobs. * In SR-IOV or passthrough mode, timeout for compute - * jobs are 10000 by default. + * jobs are 60000 by default. */ adev->gfx_timeout = msecs_to_jiffies(10000); adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) - adev->compute_timeout = adev->gfx_timeout; + adev->compute_timeout = msecs_to_jiffies(60000); else adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; -- cgit v1.2.3 From 04bef61e5da18c2b301c629a209ccdba4d4c6fbb Mon Sep 17 00:00:00 2001 From: Jack Zhang Date: Thu, 2 Apr 2020 15:10:24 +0800 Subject: drm/amdgpu/sriov add amdgpu_amdkfd_pre_reset in gpu reset kfd_pre_reset will free mem_objs allocated by kfd_gtt_sa_allocate Without this change, sriov tdr code path will never free those allocated memories and get memory leak. v2:add a bugfix for kiq ring test fail Signed-off-by: Jack Zhang Reviewed-by: Monk Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 3 files changed, 8 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 4ec6d0c03201..bdc1f5a532f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -543,6 +543,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t temp; struct v10_compute_mqd *m = get_mqd(mqd); + if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) + return 0; + #if 0 unsigned long flags; int retry; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..c2562d65e0b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -541,6 +541,9 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t temp; struct v9_mqd *m = get_mqd(mqd); + if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) + return 0; + if (adev->in_gpu_reset) return -EIO; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 90601966c524..626b46fa1a63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3852,6 +3852,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; + amdgpu_amdkfd_pre_reset(adev); + /* Resume IP prior to SMC */ r = amdgpu_device_ip_reinit_early_sriov(adev); if (r) -- cgit v1.2.3 From 1c6d567bdf73a207f51ef2e5745854ba7daa22c7 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 1 Apr 2020 11:46:57 +0200 Subject: drm/amdgpu: rework sched_list generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generate HW IP's sched_list in amdgpu_ring_init() instead of amdgpu_ctx.c. This makes amdgpu_ctx_init_compute_sched(), ring.has_high_prio and amdgpu_ctx_init_sched() unnecessary. This patch also stores sched_list for all HW IPs in one big array in struct amdgpu_device which makes amdgpu_ctx_init_entity() much more leaner. v2: fix a coding style issue do not use drm hw_ip const to populate amdgpu_ring_type enum v3: remove ctx reference and move sched array and num_sched to a struct use num_scheds to detect uninitialized scheduler list v4: use array_index_nospec for user space controlled variables fix possible checkpatch.pl warnings Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 160 ++++++----------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 3 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 5 - drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 14 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 14 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 4 - drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 3 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 14 +-- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 7 +- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 7 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 11 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 13 +-- drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 3 +- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 3 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/si_dma.c | 3 +- drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 3 +- drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 7 +- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 7 +- 35 files changed, 144 insertions(+), 197 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 7b3058fb5662..ccc581d246f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -853,6 +853,7 @@ struct amdgpu_device { struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; bool ib_pool_ready; struct amdgpu_sa_manager ring_tmp_bo[AMDGPU_IB_POOL_MAX]; + struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX]; /* interrupts */ struct amdgpu_irq irq; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 6ed36a2c5f73..8842c55d4490 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "amdgpu_sched.h" #include "amdgpu_ras.h" +#include #define to_amdgpu_ctx_entity(e) \ container_of((e), struct amdgpu_ctx_entity, entity) @@ -72,13 +73,30 @@ static enum gfx_pipe_priority amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sch } } -static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring) +static unsigned int amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev, + enum drm_sched_priority prio, + u32 hw_ip) +{ + unsigned int hw_prio; + + hw_prio = (hw_ip == AMDGPU_HW_IP_COMPUTE) ? + amdgpu_ctx_sched_prio_to_compute_prio(prio) : + AMDGPU_RING_PRIO_DEFAULT; + hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); + if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0) + hw_prio = AMDGPU_RING_PRIO_DEFAULT; + + return hw_prio; +} + +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, + const u32 ring) { struct amdgpu_device *adev = ctx->adev; struct amdgpu_ctx_entity *entity; struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; unsigned num_scheds = 0; - enum gfx_pipe_priority hw_prio; + unsigned int hw_prio; enum drm_sched_priority priority; int r; @@ -90,52 +108,16 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const entity->sequence = 1; priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; - switch (hw_ip) { - case AMDGPU_HW_IP_GFX: - sched = &adev->gfx.gfx_ring[0].sched; - scheds = &sched; - num_scheds = 1; - break; - case AMDGPU_HW_IP_COMPUTE: - hw_prio = amdgpu_ctx_sched_prio_to_compute_prio(priority); - scheds = adev->gfx.compute_prio_sched[hw_prio]; - num_scheds = adev->gfx.num_compute_sched[hw_prio]; - break; - case AMDGPU_HW_IP_DMA: - scheds = adev->sdma.sdma_sched; - num_scheds = adev->sdma.num_sdma_sched; - break; - case AMDGPU_HW_IP_UVD: - sched = &adev->uvd.inst[0].ring.sched; - scheds = &sched; - num_scheds = 1; - break; - case AMDGPU_HW_IP_VCE: - sched = &adev->vce.ring[0].sched; - scheds = &sched; - num_scheds = 1; - break; - case AMDGPU_HW_IP_UVD_ENC: - sched = &adev->uvd.inst[0].ring_enc[0].sched; - scheds = &sched; - num_scheds = 1; - break; - case AMDGPU_HW_IP_VCN_DEC: - sched = drm_sched_pick_best(adev->vcn.vcn_dec_sched, - adev->vcn.num_vcn_dec_sched); - scheds = &sched; - num_scheds = 1; - break; - case AMDGPU_HW_IP_VCN_ENC: - sched = drm_sched_pick_best(adev->vcn.vcn_enc_sched, - adev->vcn.num_vcn_enc_sched); + hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, hw_ip); + + hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); + scheds = adev->gpu_sched[hw_ip][hw_prio].sched; + num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; + + if (hw_ip == AMDGPU_HW_IP_VCN_ENC || hw_ip == AMDGPU_HW_IP_VCN_DEC) { + sched = drm_sched_pick_best(scheds, num_scheds); scheds = &sched; num_scheds = 1; - break; - case AMDGPU_HW_IP_VCN_JPEG: - scheds = adev->jpeg.jpeg_sched; - num_scheds = adev->jpeg.num_jpeg_sched; - break; } r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds, @@ -178,7 +160,6 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; return 0; - } static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) @@ -525,7 +506,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, enum drm_sched_priority priority) { struct amdgpu_device *adev = ctx->adev; - enum gfx_pipe_priority hw_prio; + unsigned int hw_prio; struct drm_gpu_scheduler **scheds = NULL; unsigned num_scheds; @@ -534,9 +515,11 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, /* set hw priority */ if (hw_ip == AMDGPU_HW_IP_COMPUTE) { - hw_prio = amdgpu_ctx_sched_prio_to_compute_prio(priority); - scheds = adev->gfx.compute_prio_sched[hw_prio]; - num_scheds = adev->gfx.num_compute_sched[hw_prio]; + hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, + AMDGPU_HW_IP_COMPUTE); + hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX); + scheds = adev->gpu_sched[hw_ip][hw_prio].sched; + num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; drm_sched_entity_modify_sched(&aentity->entity, scheds, num_scheds); } @@ -665,78 +648,3 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) idr_destroy(&mgr->ctx_handles); mutex_destroy(&mgr->lock); } - - -static void amdgpu_ctx_init_compute_sched(struct amdgpu_device *adev) -{ - int num_compute_sched_normal = 0; - int num_compute_sched_high = AMDGPU_MAX_COMPUTE_RINGS - 1; - int i; - - /* use one drm sched array, gfx.compute_sched to store both high and - * normal priority drm compute schedulers */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - if (!adev->gfx.compute_ring[i].has_high_prio) - adev->gfx.compute_sched[num_compute_sched_normal++] = - &adev->gfx.compute_ring[i].sched; - else - adev->gfx.compute_sched[num_compute_sched_high--] = - &adev->gfx.compute_ring[i].sched; - } - - /* compute ring only has two priority for now */ - i = AMDGPU_GFX_PIPE_PRIO_NORMAL; - adev->gfx.compute_prio_sched[i] = &adev->gfx.compute_sched[0]; - adev->gfx.num_compute_sched[i] = num_compute_sched_normal; - - i = AMDGPU_GFX_PIPE_PRIO_HIGH; - if (num_compute_sched_high == (AMDGPU_MAX_COMPUTE_RINGS - 1)) { - /* When compute has no high priority rings then use */ - /* normal priority sched array */ - adev->gfx.compute_prio_sched[i] = &adev->gfx.compute_sched[0]; - adev->gfx.num_compute_sched[i] = num_compute_sched_normal; - } else { - adev->gfx.compute_prio_sched[i] = - &adev->gfx.compute_sched[num_compute_sched_high - 1]; - adev->gfx.num_compute_sched[i] = - adev->gfx.num_compute_rings - num_compute_sched_normal; - } -} - -void amdgpu_ctx_init_sched(struct amdgpu_device *adev) -{ - int i, j; - - amdgpu_ctx_init_compute_sched(adev); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - adev->gfx.gfx_sched[i] = &adev->gfx.gfx_ring[i].sched; - adev->gfx.num_gfx_sched++; - } - - for (i = 0; i < adev->sdma.num_instances; i++) { - adev->sdma.sdma_sched[i] = &adev->sdma.instance[i].ring.sched; - adev->sdma.num_sdma_sched++; - } - - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] = - &adev->vcn.inst[i].ring_dec.sched; - } - - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - for (j = 0; j < adev->vcn.num_enc_rings; ++j) - adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] = - &adev->vcn.inst[i].ring_enc[j].sched; - } - - for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - if (adev->jpeg.harvest_config & (1 << i)) - continue; - adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] = - &adev->jpeg.inst[i].ring_dec.sched; - } -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..f54e10314661 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -88,7 +88,4 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); -void amdgpu_ctx_init_sched(struct amdgpu_device *adev); - - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 626b46fa1a63..f7c51fe1bc35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3210,8 +3210,6 @@ fence_driver_init: adev->gfx.config.max_cu_per_sh, adev->gfx.cu_info.number); - amdgpu_ctx_init_sched(adev); - adev->accel_working = true; amdgpu_vm_check_compute_bug(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 6b9c9193cdfa..92f2e59056c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -320,7 +320,8 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->eop_gpu_addr = kiq->eop_gpu_addr; sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); r = amdgpu_ring_init(adev, ring, 1024, - irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); + irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, + AMDGPU_RING_PRIO_DEFAULT); if (r) dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 5825692d07e4..634746829024 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -286,13 +286,8 @@ struct amdgpu_gfx { bool me_fw_write_wait; bool cp_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; - struct drm_gpu_scheduler *gfx_sched[AMDGPU_MAX_GFX_RINGS]; - uint32_t num_gfx_sched; unsigned num_gfx_rings; struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; - struct drm_gpu_scheduler **compute_prio_sched[AMDGPU_GFX_PIPE_PRIO_MAX]; - struct drm_gpu_scheduler *compute_sched[AMDGPU_MAX_COMPUTE_RINGS]; - uint32_t num_compute_sched[AMDGPU_GFX_PIPE_PRIO_MAX]; unsigned num_compute_rings; struct amdgpu_irq_src eop_irq; struct amdgpu_irq_src priv_reg_irq; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index bd9ef9cc86de..5131a0a1bc8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -43,8 +43,6 @@ struct amdgpu_jpeg { uint8_t num_jpeg_inst; struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES]; struct amdgpu_jpeg_reg internal; - struct drm_gpu_scheduler *jpeg_sched[AMDGPU_MAX_JPEG_INSTANCES]; - uint32_t num_jpeg_sched; unsigned harvest_config; struct delayed_work idle_work; enum amd_powergating_state cur_state; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index a7e1d0425ed0..5f36bd58202a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -162,11 +162,13 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) * Returns 0 on success, error on failure. */ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, - unsigned max_dw, struct amdgpu_irq_src *irq_src, - unsigned irq_type) + unsigned int max_dw, struct amdgpu_irq_src *irq_src, + unsigned int irq_type, unsigned int hw_prio) { int r, i; int sched_hw_submission = amdgpu_sched_hw_submission; + u32 *num_sched; + u32 hw_ip; /* Set the hw submission limit higher for KIQ because * it's used for a number of gfx/compute tasks by both @@ -258,6 +260,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->priority = DRM_SCHED_PRIORITY_NORMAL; mutex_init(&ring->priority_mutex); + if (ring->funcs->type >= AMDGPU_RING_TYPE_GFX && + ring->funcs->type <= AMDGPU_RING_TYPE_VCN_JPEG) { + hw_ip = ring->funcs->type; + num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds; + adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] = + &ring->sched; + } + for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i) atomic_set(&ring->num_jobs[i], 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4bae851e8d43..057e169b953f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -30,11 +30,15 @@ /* max number of rings */ #define AMDGPU_MAX_RINGS 28 +#define AMDGPU_MAX_HWIP_RINGS 8 #define AMDGPU_MAX_GFX_RINGS 2 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 #define AMDGPU_MAX_UVD_ENC_RINGS 2 +#define AMDGPU_RING_PRIO_DEFAULT 1 +#define AMDGPU_RING_PRIO_MAX AMDGPU_GFX_PIPE_PRIO_MAX + /* some special values for the owner field */ #define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul) #define AMDGPU_FENCE_OWNER_VM ((void *)1ul) @@ -65,6 +69,11 @@ struct amdgpu_ib; struct amdgpu_cs_parser; struct amdgpu_job; +struct amdgpu_sched { + u32 num_scheds; + struct drm_gpu_scheduler *sched[AMDGPU_MAX_HWIP_RINGS]; +}; + /* * Fences. */ @@ -219,7 +228,6 @@ struct amdgpu_ring { struct mutex priority_mutex; /* protected by priority_mutex */ int priority; - bool has_high_prio; #if defined(CONFIG_DEBUG_FS) struct dentry *ent; @@ -257,8 +265,8 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, - unsigned ring_size, struct amdgpu_irq_src *irq_src, - unsigned irq_type); + unsigned int ring_size, struct amdgpu_irq_src *irq_src, + unsigned int irq_type, unsigned int prio); void amdgpu_ring_fini(struct amdgpu_ring *ring); void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, uint32_t reg0, uint32_t val0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 4b352206354b..2a152516504a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -61,8 +61,6 @@ struct amdgpu_sdma_ras_funcs { struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; - struct drm_gpu_scheduler *sdma_sched[AMDGPU_MAX_SDMA_INSTANCES]; - uint32_t num_sdma_sched; struct amdgpu_irq_src trap_irq; struct amdgpu_irq_src illegal_inst_irq; struct amdgpu_irq_src ecc_irq; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index f739e1ab4cfc..6da6e2e0fdd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -207,10 +207,6 @@ struct amdgpu_vcn { uint8_t num_vcn_inst; struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; struct amdgpu_vcn_reg internal; - struct drm_gpu_scheduler *vcn_enc_sched[AMDGPU_MAX_VCN_ENC_RINGS]; - struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; - uint32_t num_vcn_enc_sched; - uint32_t num_vcn_dec_sched; struct mutex vcn_pg_lock; atomic_t total_submission_cnt; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index bade998f2cd2..7c2ee84fa4a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -979,7 +979,8 @@ static int cik_sdma_sw_init(void *handle) &adev->sdma.trap_irq, (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : - AMDGPU_SDMA_IRQ_INSTANCE1); + AMDGPU_SDMA_IRQ_INSTANCE1, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 1de01a3d4c1d..9d4a4508b68e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -1299,7 +1299,8 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); + &adev->gfx.eop_irq, irq_type, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; return 0; @@ -1310,7 +1311,8 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, { int r; unsigned irq_type; - struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + struct amdgpu_ring *ring; + unsigned int hw_prio; ring = &adev->gfx.compute_ring[ring_id]; @@ -1329,10 +1331,11 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); + &adev->gfx.eop_irq, irq_type, hw_prio); if (r) return r; @@ -3261,11 +3264,8 @@ static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; - ring->has_high_prio = true; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; - } else { - ring->has_high_prio = false; } } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index dfeebcbaaf24..aa1e1be852dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3110,7 +3110,9 @@ static int gfx_v6_0_sw_init(void *handle) ring->ring_obj = NULL; sprintf(ring->name, "gfx"); r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); + &adev->gfx.eop_irq, + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } @@ -3132,7 +3134,8 @@ static int gfx_v6_0_sw_init(void *handle) sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); + &adev->gfx.eop_irq, irq_type, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 3b24fa17ca38..e5a88cad44cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4433,7 +4433,8 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); + &adev->gfx.eop_irq, irq_type, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; @@ -4505,7 +4506,9 @@ static int gfx_v7_0_sw_init(void *handle) ring->ring_obj = NULL; sprintf(ring->name, "gfx"); r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); + &adev->gfx.eop_irq, + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index bd452101428a..f3554b5a091d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1894,6 +1894,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, int r; unsigned irq_type; struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + unsigned int hw_prio; ring = &adev->gfx.compute_ring[ring_id]; @@ -1913,9 +1914,11 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); + &adev->gfx.eop_irq, irq_type, hw_prio); if (r) return r; @@ -2019,7 +2022,8 @@ static int gfx_v8_0_sw_init(void *handle) } r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, - AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } @@ -4432,11 +4436,8 @@ static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *m if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; - ring->has_high_prio = true; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; - } else { - ring->has_high_prio = false; } } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ce92cbded922..342c7bb070bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2190,6 +2190,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, int r; unsigned irq_type; struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + unsigned int hw_prio; ring = &adev->gfx.compute_ring[ring_id]; @@ -2208,10 +2209,11 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); + &adev->gfx.eop_irq, irq_type, hw_prio); if (r) return r; @@ -2305,7 +2307,9 @@ static int gfx_v9_0_sw_init(void *handle) ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); + &adev->gfx.eop_irq, + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } @@ -3369,11 +3373,8 @@ static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *m if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; - ring->has_high_prio = true; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; - } else { - ring->has_high_prio = false; } } } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 0debfd9f428c..b10c95cad9a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -480,7 +480,8 @@ int jpeg_v1_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; sprintf(ring->name, "jpeg_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, + 0, AMDGPU_RING_PRIO_DEFAULT); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index ba6aeff122da..e67d09cb1b03 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -106,7 +106,8 @@ static int jpeg_v2_0_sw_init(void *handle) ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; sprintf(ring->name, "jpeg_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, + 0, AMDGPU_RING_PRIO_DEFAULT); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index c04c2078a7c1..37df3f2e587a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -118,7 +118,8 @@ static int jpeg_v2_5_sw_init(void *handle) ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i; sprintf(ring->name, "jpeg_dec_%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, + 0, AMDGPU_RING_PRIO_DEFAULT); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 22b4daa71f7b..9a7f194c730a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -873,7 +873,8 @@ static int sdma_v2_4_sw_init(void *handle) &adev->sdma.trap_irq, (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : - AMDGPU_SDMA_IRQ_INSTANCE1); + AMDGPU_SDMA_IRQ_INSTANCE1, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 7cd27a85cb7b..e6b5b4f672a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1157,7 +1157,8 @@ static int sdma_v3_0_sw_init(void *handle) &adev->sdma.trap_irq, (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : - AMDGPU_SDMA_IRQ_INSTANCE1); + AMDGPU_SDMA_IRQ_INSTANCE1, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index c11014bb494f..b1bfda23ea31 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1859,7 +1859,8 @@ static int sdma_v4_0_sw_init(void *handle) sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, - AMDGPU_SDMA_IRQ_INSTANCE0 + i); + AMDGPU_SDMA_IRQ_INSTANCE0 + i, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; @@ -1877,7 +1878,8 @@ static int sdma_v4_0_sw_init(void *handle) sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, - AMDGPU_SDMA_IRQ_INSTANCE0 + i); + AMDGPU_SDMA_IRQ_INSTANCE0 + i, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 3b631d003db1..a1c44450fe4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1272,7 +1272,8 @@ static int sdma_v5_0_sw_init(void *handle) &adev->sdma.trap_irq, (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : - AMDGPU_SDMA_IRQ_INSTANCE1); + AMDGPU_SDMA_IRQ_INSTANCE1, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 5103dc4c8029..e4b8283ad11d 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -504,7 +504,8 @@ static int si_dma_sw_init(void *handle) &adev->sdma.trap_irq, (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : - AMDGPU_SDMA_IRQ_INSTANCE1); + AMDGPU_SDMA_IRQ_INSTANCE1, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 957e14e2c155..3cafba726587 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -118,7 +118,8 @@ static int uvd_v4_2_sw_init(void *handle) ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 2aad6689823b..a566ff926e90 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -116,7 +116,8 @@ static int uvd_v5_0_sw_init(void *handle) ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 81186be66600..0a880bc101b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -418,7 +418,8 @@ static int uvd_v6_0_sw_init(void *handle) ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; @@ -430,7 +431,9 @@ static int uvd_v6_0_sw_init(void *handle) for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst->ring_enc[i]; sprintf(ring->name, "uvd_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); + r = amdgpu_ring_init(adev, ring, 512, + &adev->uvd.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index eef56211b3a2..7a55457e6f9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -452,7 +452,9 @@ static int uvd_v7_0_sw_init(void *handle) if (!amdgpu_sriov_vf(adev)) { ring = &adev->uvd.inst[j].ring; sprintf(ring->name, "uvd_%d", ring->me); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); + r = amdgpu_ring_init(adev, ring, 512, + &adev->uvd.inst[j].irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } @@ -471,7 +473,9 @@ static int uvd_v7_0_sw_init(void *handle) else ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring2_3 * 2 + 1; } - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); + r = amdgpu_ring_init(adev, ring, 512, + &adev->uvd.inst[j].irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index b6837fcfdba7..0e2945baf0f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -434,7 +434,8 @@ static int vce_v2_0_sw_init(void *handle) ring = &adev->vce.ring[i]; sprintf(ring->name, "vce%d", i); r = amdgpu_ring_init(adev, ring, 512, - &adev->vce.irq, 0); + &adev->vce.irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 217db187207c..6d9108fa22e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -442,7 +442,8 @@ static int vce_v3_0_sw_init(void *handle) for (i = 0; i < adev->vce.num_rings; i++) { ring = &adev->vce.ring[i]; sprintf(ring->name, "vce%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 5e986dea4645..a0fb119240f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -476,7 +476,8 @@ static int vce_v4_0_sw_init(void *handle) else ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1; } - r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index f570ac72a351..1ad79155ed00 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -127,7 +127,8 @@ static int vcn_v1_0_sw_init(void *handle) ring = &adev->vcn.inst->ring_dec; sprintf(ring->name, "vcn_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; @@ -145,7 +146,8 @@ static int vcn_v1_0_sw_init(void *handle) for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.inst->ring_enc[i]; sprintf(ring->name, "vcn_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 11a75f816ba0..349da7bf7c68 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -134,7 +134,8 @@ static int vcn_v2_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; sprintf(ring->name, "vcn_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; @@ -164,7 +165,8 @@ static int vcn_v2_0_sw_init(void *handle) else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + i; sprintf(ring->name, "vcn_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 8470e09eca0b..0fa1c5cec439 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -193,7 +193,8 @@ static int vcn_v2_5_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + (amdgpu_sriov_vf(adev) ? 2*j : 8*j); sprintf(ring->name, "vcn_dec_%d", j); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, + 0, AMDGPU_RING_PRIO_DEFAULT); if (r) return r; @@ -205,7 +206,9 @@ static int vcn_v2_5_sw_init(void *handle) (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j)); sprintf(ring->name, "vcn_enc_%d.%d", j, i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + r = amdgpu_ring_init(adev, ring, 512, + &adev->vcn.inst[j].irq, 0, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } -- cgit v1.2.3 From fe9824d15eff7ef38f63eddd1a06648e49286c7b Mon Sep 17 00:00:00 2001 From: Jack Zhang Date: Tue, 7 Apr 2020 13:44:51 +0800 Subject: drm/amdkfd Avoid destroy hqd when GPU is on reset This reverts commit 5161bba4311f in order to split it into two different patches, and this will make it easier to understand. [PATCH 1/2] porting to gfx10 from commit 1b0bfcff463f390c40 ("drm/amdgpu: Avoid destroy hqd when GPU is on reset") Originally, MEC is touched without GPU initialized first. Signed-off-by: Jack Zhang Reviewed-by: Monk Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 3 --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 -- 3 files changed, 2 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index bdc1f5a532f3..691c89705bcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -543,8 +543,8 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t temp; struct v10_compute_mqd *m = get_mqd(mqd); - if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) - return 0; + if (adev->in_gpu_reset) + return -EIO; #if 0 unsigned long flags; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index c2562d65e0b5..df841c2ac5e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -541,9 +541,6 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t temp; struct v9_mqd *m = get_mqd(mqd); - if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) - return 0; - if (adev->in_gpu_reset) return -EIO; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f7c51fe1bc35..c38777184433 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3850,8 +3850,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; - amdgpu_amdkfd_pre_reset(adev); - /* Resume IP prior to SMC */ r = amdgpu_device_ip_reinit_early_sriov(adev); if (r) -- cgit v1.2.3 From b639c22c98ff0ba140799ddc56f639240014f999 Mon Sep 17 00:00:00 2001 From: Jack Zhang Date: Tue, 7 Apr 2020 13:50:05 +0800 Subject: drm/amdgpu/sriov add amdgpu_amdkfd_pre_reset in gpu reset [PATCH 2/2] kfd_pre_reset will free mem_objs allocated by kfd_gtt_sa_allocate Without this change, sriov tdr code path will never free those allocated memories and get memory leak. Signed-off-by: Jack Zhang Reviewed-by: Monk Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c38777184433..f7c51fe1bc35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3850,6 +3850,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; + amdgpu_amdkfd_pre_reset(adev); + /* Resume IP prior to SMC */ r = amdgpu_device_ip_reinit_early_sriov(adev); if (r) -- cgit v1.2.3 From a23ca7f76d558e2275cea6033171a6c47fcd002c Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 7 Apr 2020 20:21:26 +0800 Subject: drm/amdgpu: fix gfx hang during suspend with video playback (v2) The system will be hang up during S3 suspend because of SMU is pending for GC not respose the register CP_HQD_ACTIVE access request.This issue root cause of accessing the GC register under enter GFX CGGPG and can be fixed by disable GFX CGPG before perform suspend. v2: Use disable the GFX CGPG instead of RLC safe mode guard. Signed-off-by: Prike Liang Tested-by: Mengbing Wang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f7c51fe1bc35..a191f6e48550 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2438,8 +2438,6 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) { int i, r; - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); - amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) @@ -3468,6 +3466,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) } } + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + amdgpu_amdkfd_suspend(adev, !fbcon); amdgpu_ras_suspend(adev); -- cgit v1.2.3 From dec0520aff8df2f1aca8ab9b29818a8002592bec Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 3 Apr 2020 16:40:35 +0800 Subject: drm/amdgpu: remove inproper workaround for vega10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the workaround is not needed for soc15 ASICs except for vega10. it is even not needed with latest vega10 vbios. Signed-off-by: Hawking Zhang Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ---------------- 2 files changed, 18 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ccc581d246f7..992778e3139c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -956,8 +956,6 @@ struct amdgpu_device { /* s3/s4 mask */ bool in_suspend; - /* record last mm index being written through WREG32*/ - unsigned long last_mm_index; bool in_gpu_reset; enum pp_mp1_state mp1_state; struct mutex lock_reset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a191f6e48550..eddfef191405 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -387,10 +387,6 @@ void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); } - - if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { - udelay(500); - } } /** @@ -406,10 +402,6 @@ void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) { - if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { - adev->last_mm_index = v; - } - if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) return amdgpu_kiq_wreg(adev, reg, v); @@ -464,20 +456,12 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) */ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { - if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { - adev->last_mm_index = v; - } - if ((reg * 4) < adev->rio_mem_size) iowrite32(v, adev->rio_mem + (reg * 4)); else { iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); } - - if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { - udelay(500); - } } /** -- cgit v1.2.3 From ec59847e741d7d2c9d404bb3d1efcf11b77293ff Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 3 Apr 2020 17:51:42 +0800 Subject: drm/amdgpu: retire RREG32_IDX/WREG32_IDX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit those are not needed anymore Signed-off-by: Hawking Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 992778e3139c..0abba6307f62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1023,8 +1023,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev); /* * Registers read & write functions. */ - -#define AMDGPU_REGS_IDX (1<<0) #define AMDGPU_REGS_NO_KIQ (1<<1) #define AMDGPU_REGS_KIQ (1<<2) @@ -1038,10 +1036,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) -#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX) #define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0)) #define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0) -#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_IDX) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index eddfef191405..f3e144d226c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -317,7 +317,7 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) return amdgpu_kiq_rreg(adev, reg); - if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) + if ((reg * 4) < adev->rmmio_size) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); else { unsigned long flags; @@ -377,7 +377,7 @@ void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, { trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); - if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) + if ((reg * 4) < adev->rmmio_size) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); else { unsigned long flags; -- cgit v1.2.3 From f384ff95f652ad743d6e3a901c60ca521da78674 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 3 Apr 2020 17:58:06 +0800 Subject: drm/amdgpu: retire AMDGPU_REGS_KIQ flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit all the register access through kiq is redirected to amdgpu_kiq_rreg/amdgpu_kiq_wreg Signed-off-by: Hawking Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0abba6307f62..fc2e9d8c3ee8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1024,13 +1024,12 @@ int emu_soc_asic_init(struct amdgpu_device *adev); * Registers read & write functions. */ #define AMDGPU_REGS_NO_KIQ (1<<1) -#define AMDGPU_REGS_KIQ (1<<2) #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) -#define RREG32_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_KIQ) -#define WREG32_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_KIQ) +#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) +#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f3e144d226c0..a431de1015a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -314,7 +314,7 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, { uint32_t ret; - if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_kiq_rreg(adev, reg); if ((reg * 4) < adev->rmmio_size) @@ -402,7 +402,7 @@ void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) { - if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_kiq_wreg(adev, reg, v); amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); -- cgit v1.2.3 From 2eee0229f65e897134566888e5321bcb3af0df7a Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 8 Apr 2020 16:18:52 +0800 Subject: drm/amdgpu: support access regs outside of mmio bar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit add indirect access support to registers outside of mmio bar. Signed-off-by: Hawking Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 18 ++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 +++++++++++------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 4 +-- 3 files changed, 28 insertions(+), 39 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index fc2e9d8c3ee8..164c545d2432 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1003,10 +1003,10 @@ int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, uint32_t *buf, size_t size, bool write); -uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, +uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t acc_flags); +void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); -void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - uint32_t acc_flags); void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); @@ -1025,8 +1025,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); */ #define AMDGPU_REGS_NO_KIQ (1<<1) -#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) -#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) +#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) +#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) #define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) #define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) @@ -1034,9 +1034,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) -#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) -#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0)) -#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0) +#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0) +#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_device_rreg(adev, (reg), 0)) +#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) @@ -1073,7 +1073,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); tmp_ |= ((val) & ~(mask)); \ WREG32_PLL(reg, tmp_); \ } while (0) -#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_mm_rreg((adev), (reg), false)) +#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_device_rreg((adev), (reg), false)) #define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg)) #define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a431de1015a3..f51a32fb3c03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -298,10 +298,10 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, } /* - * MMIO register access helper functions. + * device register access helper functions. */ /** - * amdgpu_mm_rreg - read a memory mapped IO register + * amdgpu_device_rreg - read a register * * @adev: amdgpu_device pointer * @reg: dword aligned register offset @@ -309,8 +309,8 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, * * Returns the 32 bit value from the offset specified. */ -uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, - uint32_t acc_flags) +uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t acc_flags) { uint32_t ret; @@ -319,15 +319,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, if ((reg * 4) < adev->rmmio_size) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); - else { - unsigned long flags; - - spin_lock_irqsave(&adev->mmio_idx_lock, flags); - writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); - ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); - } - trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret); + else + ret = adev->pcie_rreg(adev, (reg * 4)); + trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); return ret; } @@ -373,24 +367,19 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) BUG(); } -void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) +void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg, + uint32_t v, uint32_t acc_flags) { - trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); + trace_amdgpu_device_wreg(adev->pdev->device, reg, v); if ((reg * 4) < adev->rmmio_size) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); - else { - unsigned long flags; - - spin_lock_irqsave(&adev->mmio_idx_lock, flags); - writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); - writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); - } + else + adev->pcie_wreg(adev, (reg * 4), v); } /** - * amdgpu_mm_wreg - write to a memory mapped IO register + * amdgpu_device_wreg - write to a register * * @adev: amdgpu_device pointer * @reg: dword aligned register offset @@ -399,13 +388,13 @@ void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, * * Writes the value specified to the offset specified. */ -void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - uint32_t acc_flags) +void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + uint32_t acc_flags) { if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_kiq_wreg(adev, reg, v); - amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); + amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); } /* @@ -424,7 +413,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v); } - amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); + amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 63e734a125fb..5da20fc166d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -35,7 +35,7 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) -TRACE_EVENT(amdgpu_mm_rreg, +TRACE_EVENT(amdgpu_device_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), TP_STRUCT__entry( @@ -54,7 +54,7 @@ TRACE_EVENT(amdgpu_mm_rreg, (unsigned long)__entry->value) ); -TRACE_EVENT(amdgpu_mm_wreg, +TRACE_EVENT(amdgpu_device_wreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), TP_STRUCT__entry( -- cgit v1.2.3 From dadce777e0947b9b6839f06f360882e54ba2a154 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 10 Apr 2020 15:38:44 +0800 Subject: drm/amdgpu: fix wrong vram lost counter increment V2 Vram lost counter is wrongly increased by two during baco reset. V2: assumed vram lost for mode1 reset on all ASICs Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 ++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/cik.c | 2 -- drivers/gpu/drm/amd/amdgpu/nv.c | 4 ---- drivers/gpu/drm/amd/amdgpu/soc15.c | 4 ---- drivers/gpu/drm/amd/amdgpu/vi.c | 2 -- 5 files changed, 18 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f51a32fb3c03..9db9ab417dae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2079,8 +2079,24 @@ static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) */ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) { - return !!memcmp(adev->gart.ptr, adev->reset_magic, - AMDGPU_RESET_MAGIC_NUM); + if (memcmp(adev->gart.ptr, adev->reset_magic, + AMDGPU_RESET_MAGIC_NUM)) + return true; + + if (!adev->in_gpu_reset) + return false; + + /* + * For all ASICs with baco/mode1 reset, the VRAM is + * always assumed to be lost. + */ + switch (amdgpu_asic_reset_method(adev)) { + case AMD_RESET_METHOD_BACO: + case AMD_RESET_METHOD_MODE1: + return true; + default: + return false; + } } /** diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index db68ffa27984..fe306d0f73f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1358,8 +1358,6 @@ static int cik_asic_reset(struct amdgpu_device *adev) int r; if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { - if (!adev->in_suspend) - amdgpu_inc_vram_lost(adev); r = amdgpu_dpm_baco_reset(adev); } else { r = cik_asic_pci_config_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 7768880fcccf..995bdec9fa7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -351,8 +351,6 @@ static int nv_asic_reset(struct amdgpu_device *adev) struct smu_context *smu = &adev->smu; if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { - if (!adev->in_suspend) - amdgpu_inc_vram_lost(adev); ret = smu_baco_enter(smu); if (ret) return ret; @@ -360,8 +358,6 @@ static int nv_asic_reset(struct amdgpu_device *adev) if (ret) return ret; } else { - if (!adev->in_suspend) - amdgpu_inc_vram_lost(adev); ret = nv_asic_mode1_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index a597ad22b675..58a440a15525 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -569,14 +569,10 @@ static int soc15_asic_reset(struct amdgpu_device *adev) switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_BACO: - if (!adev->in_suspend) - amdgpu_inc_vram_lost(adev); return soc15_asic_baco_reset(adev); case AMD_RESET_METHOD_MODE2: return amdgpu_dpm_mode2_reset(adev); default: - if (!adev->in_suspend) - amdgpu_inc_vram_lost(adev); return soc15_asic_mode1_reset(adev); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 0a90c296409b..af8986a55354 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -744,8 +744,6 @@ static int vi_asic_reset(struct amdgpu_device *adev) int r; if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { - if (!adev->in_suspend) - amdgpu_inc_vram_lost(adev); r = amdgpu_dpm_baco_reset(adev); } else { r = vi_asic_pci_config_reset(adev); -- cgit v1.2.3 From dd4fa6c1b89a4f4034ec79c65405fdfc920204d1 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 8 Apr 2020 21:28:15 -0400 Subject: drm/amd/amdgpu: remove hardcoded module name in prints Let format prefixes take care of printing the module name through pr_fmt and dev_fmt definitions. Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 9dff792c9290..d7df11b07d11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -362,13 +362,13 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate, ¶m); if (ret) { - pr_err("amdgpu: failed to validate PT BOs\n"); + pr_err("failed to validate PT BOs\n"); return ret; } ret = amdgpu_amdkfd_validate(¶m, pd); if (ret) { - pr_err("amdgpu: failed to validate PD\n"); + pr_err("failed to validate PD\n"); return ret; } @@ -377,7 +377,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) if (vm->use_cpu_for_update) { ret = amdgpu_bo_kmap(pd, NULL); if (ret) { - pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); + pr_err("failed to kmap PD, ret=%d\n", ret); return ret; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9db9ab417dae..9f1377c16090 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1187,7 +1187,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero return; if (state == VGA_SWITCHEROO_ON) { - pr_info("amdgpu: switched on\n"); + pr_info("switched on\n"); /* don't suspend or resume card normally */ dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; @@ -1201,7 +1201,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero dev->switch_power_state = DRM_SWITCH_POWER_ON; drm_kms_helper_poll_enable(dev); } else { - pr_info("amdgpu: switched off\n"); + pr_info("switched off\n"); drm_kms_helper_poll_disable(dev); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; amdgpu_device_suspend(dev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 5ed4227f304b..0cc4c67f95f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -260,7 +260,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags); if (nvec > 0) { adev->irq.msi_enabled = true; - dev_dbg(adev->dev, "amdgpu: using MSI/MSI-X.\n"); + dev_dbg(adev->dev, "using MSI/MSI-X.\n"); } } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index b205039350b6..c1a530dbe162 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -858,7 +858,7 @@ static int gmc_v6_0_sw_init(void *handle) r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); + dev_warn(adev->dev, "No suitable DMA available.\n"); return r; } adev->need_swiotlb = drm_need_swiotlb(44); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 9da9596a3638..e8529e244a2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1019,7 +1019,7 @@ static int gmc_v7_0_sw_init(void *handle) r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40)); if (r) { - pr_warn("amdgpu: No suitable DMA available\n"); + pr_warn("No suitable DMA available\n"); return r; } adev->need_swiotlb = drm_need_swiotlb(40); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 27d83204fa2b..0aa5b82808d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1144,7 +1144,7 @@ static int gmc_v8_0_sw_init(void *handle) r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40)); if (r) { - pr_warn("amdgpu: No suitable DMA available\n"); + pr_warn("No suitable DMA available\n"); return r; } adev->need_swiotlb = drm_need_swiotlb(40); -- cgit v1.2.3 From ced1ba9761693ec310c33edf47c63b062a09be4a Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 13 Apr 2020 21:41:14 +0800 Subject: drm/amdgpu: fix the hw hang during perform system reboot and reset The system reboot failed as some IP blocks enter power gate before perform hw resource destory. Meanwhile use unify interface to set device CGPG to ungate state can simplify the amdgpu poweroff or reset ungate guard. Fixes: 487eca11a321ef ("drm/amdgpu: fix gfx hang during suspend with video playback (v2)") Signed-off-by: Prike Liang Tested-by: Mengbing Wang Tested-by: Paul Menzel Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9f1377c16090..706f93d26136 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2427,6 +2427,8 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) { int i, r; + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) -- cgit v1.2.3 From fdd21e62b01b0c618a19237344dc96b843647811 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Mon, 13 Apr 2020 14:15:44 -0400 Subject: Revert "drm/amdgpu: use the BAR if possible in amdgpu_device_vram_access v2" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit c12b84d6e0d70f1185e6daddfd12afb671791b6e. The original patch causes a RAS event and subsequent kernel hard-hang when running the KFDMemoryTest.PtraceAccessInvisibleVram on VG20 and Arcturus dmesg output at hang time: [drm] RAS event of type ERREVENT_ATHUB_INTERRUPT detected! amdgpu 0000:67:00.0: GPU reset begin! Evicting PASID 0x8000 queues Started evicting pasid 0x8000 qcm fence wait loop timeout expired The cp might be in an unrecoverable state due to an unsuccessful queues preemption Failed to evict process queues Failed to suspend process 0x8000 Finished evicting pasid 0x8000 Started restoring pasid 0x8000 Finished restoring pasid 0x8000 [drm] UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT amdgpu: [powerplay] Failed to send message 0x26, response 0x0 amdgpu: [powerplay] Failed to set soft min gfxclk ! amdgpu: [powerplay] Failed to upload DPM Bootup Levels! amdgpu: [powerplay] Failed to send message 0x7, response 0x0 amdgpu: [powerplay] [DisableAllSMUFeatures] Failed to disable all smu features! amdgpu: [powerplay] [DisableDpmTasks] Failed to disable all smu features! amdgpu: [powerplay] [PowerOffAsic] Failed to disable DPM! [drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP block failed -5 Signed-off-by: Kent Russell Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 706f93d26136..9da5fc805d00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -254,32 +254,6 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, uint32_t hi = ~0; uint64_t last; - -#ifdef CONFIG_64BIT - last = min(pos + size, adev->gmc.visible_vram_size); - if (last > pos) { - void __iomem *addr = adev->mman.aper_base_kaddr + pos; - size_t count = last - pos; - - if (write) { - memcpy_toio(addr, buf, count); - mb(); - amdgpu_asic_flush_hdp(adev, NULL); - } else { - amdgpu_asic_invalidate_hdp(adev, NULL); - mb(); - memcpy_fromio(buf, addr, count); - } - - if (count == size) - return; - - pos += count; - buf += count / 4; - size -= count; - } -#endif - spin_lock_irqsave(&adev->mmio_idx_lock, flags); for (last = pos + size; pos < last; pos += 4) { uint32_t tmp = pos >> 31; -- cgit v1.2.3 From d84a430d9f7b1ce6baedf1305106d0ae706aca76 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 17 Mar 2020 15:43:41 -0400 Subject: drm/amdgpu: fix race between pstate and remote buffer map Vega20 arbitrates pstate at hive level and not device level. Last peer to remote buffer unmap could drop P-State while another process is still remote buffer mapped. With this fix, P-States still needs to be disabled for now as SMU bug was discovered on synchronous P2P transfers. This should be fixed in the next FW update. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 ++----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 -- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 68 ++++++++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 9 +++- 6 files changed, 49 insertions(+), 53 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 215f57765e5e..8a7f794e3ffa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -986,8 +986,6 @@ struct amdgpu_device { uint64_t unique_id; uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; - /* device pstate */ - int pstate; /* enable runtime pm on the device */ bool runpm; bool in_runpm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9da5fc805d00..ad74fd88fa76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2248,7 +2248,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) if (gpu_instance->adev->flags & AMD_IS_APU) continue; - r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0); + r = amdgpu_xgmi_set_pstate(gpu_instance->adev, + AMDGPU_XGMI_PSTATE_MIN); if (r) { DRM_ERROR("pstate setting failed (%d).\n", r); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6d9252a27916..c6c3fc276fa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2124,11 +2124,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, if (bo && amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev)) && (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)) { bo_va->is_xgmi = true; - mutex_lock(&adev->vm_manager.lock_pstate); /* Power up XGMI if it can be potentially used */ - if (++adev->vm_manager.xgmi_map_counter == 1) - amdgpu_xgmi_set_pstate(adev, 1); - mutex_unlock(&adev->vm_manager.lock_pstate); + amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20); } return bo_va; @@ -2551,12 +2548,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, dma_fence_put(bo_va->last_pt_update); - if (bo && bo_va->is_xgmi) { - mutex_lock(&adev->vm_manager.lock_pstate); - if (--adev->vm_manager.xgmi_map_counter == 0) - amdgpu_xgmi_set_pstate(adev, 0); - mutex_unlock(&adev->vm_manager.lock_pstate); - } + if (bo && bo_va->is_xgmi) + amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN); kfree(bo_va); } @@ -3166,9 +3159,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) idr_init(&adev->vm_manager.pasid_idr); spin_lock_init(&adev->vm_manager.pasid_lock); - - adev->vm_manager.xgmi_map_counter = 0; - mutex_init(&adev->vm_manager.lock_pstate); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 06fe30e1492d..b13c14d6b820 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -349,10 +349,6 @@ struct amdgpu_vm_manager { */ struct idr pasid_idr; spinlock_t pasid_lock; - - /* counter of mapped memory through xgmi */ - uint32_t xgmi_map_counter; - struct mutex lock_pstate; }; #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 8c3215505e78..54d8a3e7e75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -373,7 +373,13 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo if (lock) mutex_lock(&tmp->hive_lock); - tmp->pstate = -1; + tmp->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN; + tmp->hi_req_gpu = NULL; + /* + * hive pstate on boot is high in vega20 so we have to go to low + * pstate on after boot. + */ + tmp->hi_req_count = AMDGPU_MAX_XGMI_DEVICE_PER_HIVE; mutex_unlock(&xgmi_mutex); return tmp; @@ -383,50 +389,51 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) { int ret = 0; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); - struct amdgpu_device *tmp_adev; - bool update_hive_pstate = true; - bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20; + struct amdgpu_device *request_adev = hive->hi_req_gpu ? + hive->hi_req_gpu : adev; + bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20; + bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN; - if (!hive) + /* fw bug so temporarily disable pstate switching */ + if (!hive || adev->asic_type == CHIP_VEGA20) return 0; mutex_lock(&hive->hive_lock); - if (hive->pstate == pstate) { - adev->pstate = is_high_pstate ? pstate : adev->pstate; + if (is_hi_req) + hive->hi_req_count++; + else + hive->hi_req_count--; + + /* + * Vega20 only needs single peer to request pstate high for the hive to + * go high but all peers must request pstate low for the hive to go low + */ + if (hive->pstate == pstate || + (!is_hi_req && hive->hi_req_count && !init_low)) goto out; - } - dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); + dev_dbg(request_adev->dev, "Set xgmi pstate %d.\n", pstate); - ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate); + ret = amdgpu_dpm_set_xgmi_pstate(request_adev, pstate); if (ret) { - dev_err(adev->dev, + dev_err(request_adev->dev, "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", - adev->gmc.xgmi.node_id, - adev->gmc.xgmi.hive_id, ret); + request_adev->gmc.xgmi.node_id, + request_adev->gmc.xgmi.hive_id, ret); goto out; } - /* Update device pstate */ - adev->pstate = pstate; - - /* - * Update the hive pstate only all devices of the hive - * are in the same pstate - */ - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - if (tmp_adev->pstate != adev->pstate) { - update_hive_pstate = false; - break; - } - } - if (update_hive_pstate || is_high_pstate) + if (init_low) + hive->pstate = hive->hi_req_count ? + hive->pstate : AMDGPU_XGMI_PSTATE_MIN; + else { hive->pstate = pstate; - + hive->hi_req_gpu = pstate != AMDGPU_XGMI_PSTATE_MIN ? + adev : NULL; + } out: mutex_unlock(&hive->hive_lock); - return ret; } @@ -507,9 +514,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit; } - /* Set default device pstate */ - adev->pstate = -1; - top_info = &adev->psp.xgmi_context.top_info; list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index d5a63904ec33..6999eab16a72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -25,6 +25,7 @@ #include #include "amdgpu_psp.h" + struct amdgpu_hive_info { uint64_t hive_id; struct list_head device_list; @@ -33,8 +34,14 @@ struct amdgpu_hive_info { struct kobject *kobj; struct device_attribute dev_attr; struct amdgpu_device *adev; - int pstate; /*0 -- low , 1 -- high , -1 unknown*/ + int hi_req_count; + struct amdgpu_device *hi_req_gpu; struct task_barrier tb; + enum { + AMDGPU_XGMI_PSTATE_MIN, + AMDGPU_XGMI_PSTATE_MAX_VEGA20, + AMDGPU_XGMI_PSTATE_UNKNOWN + } pstate; }; struct amdgpu_pcs_ras_field { -- cgit v1.2.3 From d69b8971e540ae908a4b7ba44aa11ecb42a2c406 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 17 Apr 2020 16:11:48 -0400 Subject: drm/amdgpu: Print CU information by default during initialization This is convenient for multiple teams to obtain the information. Also, add device info by using dev_info(). Signed-off-by: Yong Zhao Reviewed-by: Dennis Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ad74fd88fa76..889e68851504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3168,7 +3168,8 @@ fence_driver_init: goto failed; } - DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", + dev_info(adev->dev, + "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", adev->gfx.config.max_shader_engines, adev->gfx.config.max_sh_per_se, adev->gfx.config.max_cu_per_sh, -- cgit v1.2.3 From e05185b34157ba606bd2200bcc3c335cf19422ae Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Mon, 20 Apr 2020 23:08:14 +0800 Subject: drm/amdgpu: clean up unused variable about ring lru MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clean up unused variable: 1. ring_lru_list 2. ring_lru_list_lock related-commit: drm/amdgpu: remove ring lru handling Signed-off-by: Kevin Wang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 --- 2 files changed, 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8a7f794e3ffa..b90449cf23fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -957,9 +957,6 @@ struct amdgpu_device { /* link all shadow bo */ struct list_head shadow_list; struct mutex shadow_list_lock; - /* keep an lru list of rings by HW IP */ - struct list_head ring_lru_list; - spinlock_t ring_lru_list_lock; /* record hw reset is performed */ bool has_hw_reset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 889e68851504..c239035cdf14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2988,9 +2988,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->shadow_list); mutex_init(&adev->shadow_list_lock); - INIT_LIST_HEAD(&adev->ring_lru_list); - spin_lock_init(&adev->ring_lru_list_lock); - INIT_DELAYED_WORK(&adev->delayed_init_work, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, -- cgit v1.2.3 From a2f63ee8b5eabda8b317425d1e487e51a4d7b21e Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 16 Apr 2020 12:15:31 +0800 Subject: drm/amdgpu: correct fbdev suspend on gpu reset As for XGMI setup, it needs to be performed on all the devices from the same hive. Signed-off-by: Evan Quan Acked-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c239035cdf14..f66a33410ee8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4224,7 +4224,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ amdgpu_unregister_gpu_instance(tmp_adev); - amdgpu_fbdev_set_suspend(adev, 1); + amdgpu_fbdev_set_suspend(tmp_adev, 1); /* disable ras on ALL IPs */ if (!(in_ras_intr && !use_baco) && -- cgit v1.2.3 From 52fb44cf30fc6b11a2faf8d8d905e756ea24f919 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 16 Apr 2020 12:20:38 +0800 Subject: drm/amdgpu: correct cancel_delayed_work_sync on gpu reset As for XGMI setup, it should be performed on other devices from the hive also. Signed-off-by: Evan Quan Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f66a33410ee8..4be5187391c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4218,6 +4218,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_amdkfd_pre_reset(tmp_adev); } + cancel_delayed_work_sync(&tmp_adev->delayed_init_work); + /* * Mark these ASICs to be reseted as untracked first * And add them back after reset completed -- cgit v1.2.3 From 9e94d22c008585815f32630ee7d0d28c4ec12bb7 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 16 Apr 2020 12:27:28 +0800 Subject: drm/amdgpu: optimize the gpu reset for XGMI setup V2 This is basically just some code cosmetic. The current design for XGMI setup gput reset is to operate on current device(adev) first and then on other devices from the hive(by another 'for' loop). But actually we can do some sort to the device list(to put current device 1st position) and handle all the devices in a single 'for' loop. V2: added missing hive->hive_lock protection Signed-off-by: Evan Quan Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 76 ++++++++++-------------------- 1 file changed, 25 insertions(+), 51 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4be5187391c6..e75deb789e56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4152,16 +4152,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, } need_full_reset = job_signaled = false; - INIT_LIST_HEAD(&device_list); - - amdgpu_ras_set_error_query_ready(adev, false); dev_info(adev->dev, "GPU %s begin!\n", (in_ras_intr && !use_baco) ? "jobs stop":"reset"); - cancel_delayed_work_sync(&adev->delayed_init_work); - - hive = amdgpu_get_xgmi_hive(adev, false); + hive = amdgpu_get_xgmi_hive(adev, true); /* * Here we trylock to avoid chain of resets executing from @@ -4174,35 +4169,21 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (hive && !mutex_trylock(&hive->reset_lock)) { DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", job ? job->base.id : -1, hive->hive_id); + mutex_unlock(&hive->hive_lock); return 0; } - /* Start with adev pre asic reset first for soft reset check.*/ - if (!amdgpu_device_lock_adev(adev, !hive)) { - DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", - job ? job->base.id : -1); - return 0; - } - - /* Block kfd: SRIOV would do it separately */ - if (!amdgpu_sriov_vf(adev)) - amdgpu_amdkfd_pre_reset(adev); - - /* Build list of devices to reset */ - if (adev->gmc.xgmi.num_physical_nodes > 1) { - if (!hive) { - /*unlock kfd: SRIOV would do it separately */ - if (!amdgpu_sriov_vf(adev)) - amdgpu_amdkfd_post_reset(adev); - amdgpu_device_unlock_adev(adev); + /* + * Build list of devices to reset. + * In case we are in XGMI hive mode, resort the device list + * to put adev in the 1st position. + */ + INIT_LIST_HEAD(&device_list); + if (adev->gmc.xgmi.num_physical_nodes > 1) { + if (!hive) return -ENODEV; - } - - /* - * In case we are in XGMI hive mode device reset is done for all the - * nodes in the hive to retrain all XGMI links and hence the reset - * sequence is executed in loop on all nodes. - */ + if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list)) + list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list); device_list_handle = &hive->device_list; } else { list_add_tail(&adev->gmc.xgmi.head, &device_list); @@ -4211,15 +4192,20 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - if (tmp_adev != adev) { - amdgpu_ras_set_error_query_ready(tmp_adev, false); - amdgpu_device_lock_adev(tmp_adev, false); - if (!amdgpu_sriov_vf(tmp_adev)) - amdgpu_amdkfd_pre_reset(tmp_adev); + if (!amdgpu_device_lock_adev(tmp_adev, !hive)) { + DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", + job ? job->base.id : -1); + mutex_unlock(&hive->hive_lock); + return 0; } + amdgpu_ras_set_error_query_ready(tmp_adev, false); + cancel_delayed_work_sync(&tmp_adev->delayed_init_work); + if (!amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_pre_reset(tmp_adev); + /* * Mark these ASICs to be reseted as untracked first * And add them back after reset completed @@ -4265,22 +4251,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, goto skip_hw_reset; } - - /* Guilty job will be freed after this*/ - r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset); - if (r) { - /*TODO Should we stop ?*/ - DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", - r, adev->ddev->unique); - adev->asic_reset_res = r; - } - retry: /* Rest of adevs pre asic reset from XGMI hive. */ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - - if (tmp_adev == adev) - continue; - r = amdgpu_device_pre_asic_reset(tmp_adev, NULL, &need_full_reset); @@ -4345,8 +4317,10 @@ skip_sched_resume: amdgpu_device_unlock_adev(tmp_adev); } - if (hive) + if (hive) { mutex_unlock(&hive->reset_lock); + mutex_unlock(&hive->hive_lock); + } if (r) dev_info(adev->dev, "GPU reset end with ret = %d\n", r); -- cgit v1.2.3 From 7dd8c205eaedfa3163c307143aaf29b65190e54a Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 16 Apr 2020 12:39:04 +0800 Subject: drm/amdgpu: code cleanup around gpu reset Make code more readable. Signed-off-by: Evan Quan Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e75deb789e56..1e4527c64279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4130,7 +4130,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) { struct list_head device_list, *device_list_handle = NULL; - bool need_full_reset, job_signaled; + bool need_full_reset = false; + bool job_signaled = false; struct amdgpu_hive_info *hive = NULL; struct amdgpu_device *tmp_adev = NULL; int i, r = 0; @@ -4151,13 +4152,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, emergency_restart(); } - need_full_reset = job_signaled = false; - dev_info(adev->dev, "GPU %s begin!\n", (in_ras_intr && !use_baco) ? "jobs stop":"reset"); - hive = amdgpu_get_xgmi_hive(adev, true); - /* * Here we trylock to avoid chain of resets executing from * either trigger by jobs on different adevs in XGMI hive or jobs on @@ -4165,7 +4162,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * We always reset all schedulers for device and all devices for XGMI * hive so that should take care of them too. */ - + hive = amdgpu_get_xgmi_hive(adev, true); if (hive && !mutex_trylock(&hive->reset_lock)) { DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", job ? job->base.id : -1, hive->hive_id); @@ -4232,7 +4229,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, } } - if (in_ras_intr && !use_baco) goto skip_sched_resume; @@ -4243,10 +4239,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * job->base holds a reference to parent fence */ if (job && job->base.s_fence->parent && - dma_fence_is_signaled(job->base.s_fence->parent)) + dma_fence_is_signaled(job->base.s_fence->parent)) { job_signaled = true; - - if (job_signaled) { dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; } -- cgit v1.2.3 From a891d239f9e036031f9f1c62fe584232662cb7f1 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Wed, 22 Apr 2020 12:22:54 +0800 Subject: drm/amdgpu: set error query ready after all IPs late init If set error query ready in amdgpu_ras_late_init, which will cause some IP blocks aren't initialized, but their error query is ready. Signed-off-by: Dennis Li Reviewed-by: Guchun Chen Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 +----- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1e4527c64279..f9b315e7e004 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2216,6 +2216,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.late_initialized = true; } + amdgpu_ras_set_error_query_ready(adev, true); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 68b82f7b0b80..8b14aee370c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1921,10 +1921,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, } /* in resume phase, no need to create ras fs node */ - if (adev->in_suspend || adev->in_gpu_reset) { - amdgpu_ras_set_error_query_ready(adev, true); + if (adev->in_suspend || adev->in_gpu_reset) return 0; - } if (ih_info->cb) { r = amdgpu_ras_interrupt_add_handler(adev, ih_info); @@ -1936,8 +1934,6 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, if (r) goto sysfs; - amdgpu_ras_set_error_query_ready(adev, true); - return 0; cleanup: amdgpu_ras_sysfs_remove(adev, ras_block); -- cgit v1.2.3