diff options
| author | Hawking Zhang <Hawking.Zhang@amd.com> | 2023-03-13 14:18:34 +0800 | 
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2023-03-15 18:45:27 -0400 | 
| commit | fdc94d3a8c887e4e06a7ff8dcb51d55cd70e16cf (patch) | |
| tree | 0c5e36c606d8d27b6b92066bbbec16108c23d3bd | |
| parent | da9d669eab152dbd6e9410606a7c8c8a212a7959 (diff) | |
drm/amdgpu: Rework pcie_bif ras sw_init
pcie_bif ras blocks needs to be initialized as early
as possible to handle fatal error detected in hw_init
phase. also align the pcie_bif ras sw_init with other
ras blocks
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Stanley Yang <Stanley.Yang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c | 23 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 19 | 
3 files changed, 35 insertions, 8 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index 37d779b8e4a6..a3bc00577a7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -22,6 +22,29 @@  #include "amdgpu.h"  #include "amdgpu_ras.h" +int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev) +{ +	int err; +	struct amdgpu_nbio_ras *ras; + +	if (!adev->nbio.ras) +		return 0; + +	ras = adev->nbio.ras; +	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); +	if (err) { +		dev_err(adev->dev, "Failed to register pcie_bif ras block!\n"); +		return err; +	} + +	strcpy(ras->ras_block.ras_comm.name, "pcie_bif"); +	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__PCIE_BIF; +	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; +	adev->nbio.ras_if = &ras->ras_block.ras_comm; + +	return 0; +} +  int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)  {  	int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index a240336bbc6b..c686ff4bcc39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -106,5 +106,6 @@ struct amdgpu_nbio {  	struct amdgpu_nbio_ras  *ras;  }; +int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev);  int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 63dfcc98152d..b0d050ffc200 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2554,21 +2554,24 @@ int amdgpu_ras_init(struct amdgpu_device *adev)  	/* initialize nbio ras function ahead of any other  	 * ras functions so hardware fatal error interrupt  	 * can be enabled as early as possible */ -	switch (adev->asic_type) { -	case CHIP_VEGA20: -	case CHIP_ARCTURUS: -	case CHIP_ALDEBARAN: -		if (!adev->gmc.xgmi.connected_to_cpu) { +	switch (adev->ip_versions[NBIO_HWIP][0]) { +	case IP_VERSION(7, 4, 0): +	case IP_VERSION(7, 4, 1): +	case IP_VERSION(7, 4, 4): +		if (!adev->gmc.xgmi.connected_to_cpu)  			adev->nbio.ras = &nbio_v7_4_ras; -			amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block); -			adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm; -		}  		break;  	default:  		/* nbio ras is not available */  		break;  	} +	/* nbio ras block needs to be enabled ahead of other ras blocks +	 * to handle fatal error */ +	r = amdgpu_nbio_ras_sw_init(adev); +	if (r) +		return r; +  	if (adev->nbio.ras &&  	    adev->nbio.ras->init_ras_controller_interrupt) {  		r = adev->nbio.ras->init_ras_controller_interrupt(adev); | 
