diff options
| author | Tao Zhou <tao.zhou1@amd.com> | 2022-10-17 18:31:20 +0800 | 
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2022-10-27 15:12:08 -0400 | 
| commit | ae45a18b80d9d0d29f0ecfc52fb4e7831671b299 (patch) | |
| tree | 5e4b92907103f8773af3f400ef26cb073608f5b1 /drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | |
| parent | 24b822928b5139b85ee9a818a65e343b7e3bb4fe (diff) | |
drm/amdgpu: add RAS poison handling for MCA
For MCA poison, if unmap queue fails, only gpu reset should be
triggered without page retirement handling, MCA notifier will do it.
v2: handle MCA poison consumption in umc_poison_handler directly.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 31 | 
1 files changed, 20 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 3c83129f4090..758942150c09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -169,19 +169,28 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,  		void *ras_error_status,  		bool reset)  { -	int ret; -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; -	struct ras_common_if head = { -		.block = AMDGPU_RAS_BLOCK__UMC, -	}; -	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); +	int ret = AMDGPU_RAS_SUCCESS; -	ret = -		amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, reset); +	if (!adev->gmc.xgmi.connected_to_cpu) { +		struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; +		struct ras_common_if head = { +			.block = AMDGPU_RAS_BLOCK__UMC, +		}; +		struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); -	if (ret == AMDGPU_RAS_SUCCESS && obj) { -		obj->err_data.ue_count += err_data->ue_count; -		obj->err_data.ce_count += err_data->ce_count; +		ret = +			amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, reset); + +		if (ret == AMDGPU_RAS_SUCCESS && obj) { +			obj->err_data.ue_count += err_data->ue_count; +			obj->err_data.ce_count += err_data->ce_count; +		} +	} else if (reset) { +		/* MCA poison handler is only responsible for GPU reset, +		 * let MCA notifier do page retirement. +		 */ +		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); +		amdgpu_ras_reset_gpu(adev);  	}  	return ret;  | 
