From f75e94d86829e92a758a26fc5bbdb4c9eba86260 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Tue, 4 Aug 2020 15:00:53 +0800 Subject: drm/amdgpu: bypass querying ras error count registers Once ras recovery is issued by ras sync flood interrupt or ras controller interrupt, add this guard to bypass or execute ras error count register harvest of all IPs. Signed-off-by: Guchun Chen Reviewed-by: Hawking Zhang Reviewed-by: Dennis Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 1a55f6f492fd..fbe464c68e5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1547,17 +1547,19 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct list_head device_list, *device_list_handle = NULL; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, false); - /* Build list of devices to query RAS related errors */ - if (hive && adev->gmc.xgmi.num_physical_nodes > 1) - device_list_handle = &hive->device_list; - else { - INIT_LIST_HEAD(&device_list); - list_add_tail(&adev->gmc.xgmi.head, &device_list); - device_list_handle = &device_list; - } + if (!ras->disable_ras_err_cnt_harvest) { + /* Build list of devices to query RAS related errors */ + if (hive && adev->gmc.xgmi.num_physical_nodes > 1) { + device_list_handle = &hive->device_list; + } else { + INIT_LIST_HEAD(&device_list); + list_add_tail(&adev->gmc.xgmi.head, &device_list); + device_list_handle = &device_list; + } - list_for_each_entry(remote_adev, device_list_handle, gmc.xgmi.head) { - amdgpu_ras_log_on_err_counter(remote_adev); + list_for_each_entry(remote_adev, + device_list_handle, gmc.xgmi.head) + amdgpu_ras_log_on_err_counter(remote_adev); } if (amdgpu_device_should_recover_gpu(ras->adev)) -- cgit v1.2.3