diff options
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 23 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 18 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 18 | 
5 files changed, 32 insertions, 35 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 72f2a9aeaf0e..ba5e8635ca5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -951,7 +951,7 @@ struct amdgpu_device {  	atomic_t 			in_gpu_reset;  	enum pp_mp1_state               mp1_state; -	struct mutex  lock_reset; +	struct rw_semaphore reset_sem;  	struct amdgpu_doorbell_index doorbell_index;  	struct mutex			notifier_lock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 79b397800cbc..cc5c7f81c540 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -101,14 +101,18 @@ static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)  	file->private_data = adev; -	mutex_lock(&adev->lock_reset); +	ret = down_read_killable(&adev->reset_sem); +	if (ret) +		return ret; +  	if (adev->autodump.dumping.done) {  		reinit_completion(&adev->autodump.dumping);  		ret = 0;  	} else {  		ret = -EBUSY;  	} -	mutex_unlock(&adev->lock_reset); + +	up_read(&adev->reset_sem);  	return ret;  } @@ -1242,7 +1246,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)  	}  	/* Avoid accidently unparking the sched thread during GPU reset */ -	mutex_lock(&adev->lock_reset); +	r = down_read_killable(&adev->reset_sem); +	if (r) +		return r;  	/* hold on the scheduler */  	for (i = 0; i < AMDGPU_MAX_RINGS; i++) { @@ -1269,7 +1275,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)  		kthread_unpark(ring->sched.thread);  	} -	mutex_unlock(&adev->lock_reset); +	up_read(&adev->reset_sem);  	pm_runtime_mark_last_busy(dev->dev);  	pm_runtime_put_autosuspend(dev->dev); @@ -1459,7 +1465,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)  		return -ENOMEM;  	/* Avoid accidently unparking the sched thread during GPU reset */ -	mutex_lock(&adev->lock_reset); +	r = down_read_killable(&adev->reset_sem); +	if (r) +		goto pro_end;  	/* stop the scheduler */  	kthread_park(ring->sched.thread); @@ -1500,13 +1508,14 @@ failure:  	/* restart the scheduler */  	kthread_unpark(ring->sched.thread); -	mutex_unlock(&adev->lock_reset); +	up_read(&adev->reset_sem);  	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); +pro_end:  	kfree(fences); -	return 0; +	return r;  }  static int amdgpu_debugfs_sclk_set(void *data, u64 val) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 78fd2c9a7b7d..82242e2f5658 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3054,7 +3054,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	mutex_init(&adev->virt.vf_errors.lock);  	hash_init(adev->mn_hash);  	atomic_set(&adev->in_gpu_reset, 0); -	mutex_init(&adev->lock_reset); +	init_rwsem(&adev->reset_sem);  	mutex_init(&adev->psp.mutex);  	mutex_init(&adev->notifier_lock); @@ -4206,7 +4206,7 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev)  	if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)  		return false; -	mutex_lock(&adev->lock_reset); +	down_write(&adev->reset_sem);  	atomic_inc(&adev->gpu_reset_counter);  	switch (amdgpu_asic_reset_method(adev)) { @@ -4229,7 +4229,7 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)  	amdgpu_vf_error_trans_all(adev);  	adev->mp1_state = PP_MP1_STATE_NONE;  	atomic_set(&adev->in_gpu_reset, 0); -	mutex_unlock(&adev->lock_reset); +	up_write(&adev->reset_sem);  }  static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index f27d83f2de78..9c07014d9bd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -238,19 +238,15 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)  	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);  	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);  	int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT; -	int locked;  	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,  	 * otherwise the mailbox msg will be ruined/reseted by  	 * the VF FLR. -	 * -	 * we can unlock the lock_reset to allow "amdgpu_job_timedout" -	 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received -	 * which means host side had finished this VF's FLR.  	 */ -	locked = mutex_trylock(&adev->lock_reset); -	if (locked) -		atomic_set(&adev->in_gpu_reset, 1); +	if (!down_read_trylock(&adev->reset_sem)) +		return; + +	atomic_set(&adev->in_gpu_reset, 1);  	do {  		if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) @@ -261,10 +257,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)  	} while (timeout > 1);  flr_done: -	if (locked) { -		atomic_set(&adev->in_gpu_reset, 0); -		mutex_unlock(&adev->lock_reset); -	} +	atomic_set(&adev->in_gpu_reset, 0); +	up_read(&adev->reset_sem);  	/* Trigger recovery for world switch failure if no TDR */  	if (amdgpu_device_should_recover_gpu(adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 3cb10ab943a6..9c23abf9b140 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -259,19 +259,15 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)  	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);  	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);  	int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT; -	int locked;  	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,  	 * otherwise the mailbox msg will be ruined/reseted by  	 * the VF FLR. -	 * -	 * we can unlock the lock_reset to allow "amdgpu_job_timedout" -	 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received -	 * which means host side had finished this VF's FLR.  	 */ -	locked = mutex_trylock(&adev->lock_reset); -	if (locked) -		atomic_set(&adev->in_gpu_reset, 1); +	if (!down_read_trylock(&adev->reset_sem)) +		return; + +	atomic_set(&adev->in_gpu_reset, 1);  	do {  		if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) @@ -282,10 +278,8 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)  	} while (timeout > 1);  flr_done: -	if (locked) { -		atomic_set(&adev->in_gpu_reset, 0); -		mutex_unlock(&adev->lock_reset); -	} +	atomic_set(&adev->in_gpu_reset, 0); +	up_read(&adev->reset_sem);  	/* Trigger recovery for world switch failure if no TDR */  	if (amdgpu_device_should_recover_gpu(adev) | 
