From 528e083d85bd0306e056fe1bdfd05493ebbff9cc Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 13 Jun 2018 14:55:20 -0500 Subject: drm/amdgpu: rename rmn to amn in the MMU notifier code (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just a copy&paste leftover from radeon. v2: rebase (Alex) Signed-off-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 140 ++++++++++++++++----------------- 1 file changed, 70 insertions(+), 70 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 83e344fbb50a..37570a1c6db8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -64,7 +64,7 @@ struct amdgpu_mn_node { }; /** - * amdgpu_mn_destroy - destroy the rmn + * amdgpu_mn_destroy - destroy the amn * * @work: previously sheduled work item * @@ -72,26 +72,26 @@ struct amdgpu_mn_node { */ static void amdgpu_mn_destroy(struct work_struct *work) { - struct amdgpu_mn *rmn = container_of(work, struct amdgpu_mn, work); - struct amdgpu_device *adev = rmn->adev; + struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); + struct amdgpu_device *adev = amn->adev; struct amdgpu_mn_node *node, *next_node; struct amdgpu_bo *bo, *next_bo; mutex_lock(&adev->mn_lock); - down_write(&rmn->lock); - hash_del(&rmn->node); + down_write(&amn->lock); + hash_del(&amn->node); rbtree_postorder_for_each_entry_safe(node, next_node, - &rmn->objects.rb_root, it.rb) { + &amn->objects.rb_root, it.rb) { list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { bo->mn = NULL; list_del_init(&bo->mn_list); } kfree(node); } - up_write(&rmn->lock); + up_write(&amn->lock); mutex_unlock(&adev->mn_lock); - mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm); - kfree(rmn); + mmu_notifier_unregister_no_release(&amn->mn, amn->mm); + kfree(amn); } /** @@ -105,9 +105,9 @@ static void amdgpu_mn_destroy(struct work_struct *work) static void amdgpu_mn_release(struct mmu_notifier *mn, struct mm_struct *mm) { - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); - INIT_WORK(&rmn->work, amdgpu_mn_destroy); - schedule_work(&rmn->work); + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + INIT_WORK(&amn->work, amdgpu_mn_destroy); + schedule_work(&amn->work); } @@ -130,31 +130,31 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) } /** - * amdgpu_mn_read_lock - take the rmn read lock + * amdgpu_mn_read_lock - take the amn read lock * - * @rmn: our notifier + * @amn: our notifier * - * Take the rmn read side lock. + * Take the amn read side lock. */ -static void amdgpu_mn_read_lock(struct amdgpu_mn *rmn) +static void amdgpu_mn_read_lock(struct amdgpu_mn *amn) { - mutex_lock(&rmn->read_lock); - if (atomic_inc_return(&rmn->recursion) == 1) - down_read_non_owner(&rmn->lock); - mutex_unlock(&rmn->read_lock); + mutex_lock(&amn->read_lock); + if (atomic_inc_return(&amn->recursion) == 1) + down_read_non_owner(&amn->lock); + mutex_unlock(&amn->read_lock); } /** - * amdgpu_mn_read_unlock - drop the rmn read lock + * amdgpu_mn_read_unlock - drop the amn read lock * - * @rmn: our notifier + * @amn: our notifier * - * Drop the rmn read side lock. + * Drop the amn read side lock. */ -static void amdgpu_mn_read_unlock(struct amdgpu_mn *rmn) +static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) { - if (atomic_dec_return(&rmn->recursion) == 0) - up_read_non_owner(&rmn->lock); + if (atomic_dec_return(&amn->recursion) == 0) + up_read_non_owner(&amn->lock); } /** @@ -202,15 +202,15 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, unsigned long start, unsigned long end) { - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; /* notification is exclusive, but interval is inclusive */ end -= 1; - amdgpu_mn_read_lock(rmn); + amdgpu_mn_read_lock(amn); - it = interval_tree_iter_first(&rmn->objects, start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; @@ -238,15 +238,15 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, unsigned long start, unsigned long end) { - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; /* notification is exclusive, but interval is inclusive */ end -= 1; - amdgpu_mn_read_lock(rmn); + amdgpu_mn_read_lock(amn); - it = interval_tree_iter_first(&rmn->objects, start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; struct amdgpu_bo *bo; @@ -279,9 +279,9 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, unsigned long start, unsigned long end) { - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); - amdgpu_mn_read_unlock(rmn); + amdgpu_mn_read_unlock(amn); } static const struct mmu_notifier_ops amdgpu_mn_ops[] = { @@ -315,7 +315,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, enum amdgpu_mn_type type) { struct mm_struct *mm = current->mm; - struct amdgpu_mn *rmn; + struct amdgpu_mn *amn; unsigned long key = AMDGPU_MN_KEY(mm, type); int r; @@ -325,41 +325,41 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, return ERR_PTR(-EINTR); } - hash_for_each_possible(adev->mn_hash, rmn, node, key) - if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key) + hash_for_each_possible(adev->mn_hash, amn, node, key) + if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) goto release_locks; - rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); - if (!rmn) { - rmn = ERR_PTR(-ENOMEM); + amn = kzalloc(sizeof(*amn), GFP_KERNEL); + if (!amn) { + amn = ERR_PTR(-ENOMEM); goto release_locks; } - rmn->adev = adev; - rmn->mm = mm; - init_rwsem(&rmn->lock); - rmn->type = type; - rmn->mn.ops = &amdgpu_mn_ops[type]; - rmn->objects = RB_ROOT_CACHED; - mutex_init(&rmn->read_lock); - atomic_set(&rmn->recursion, 0); + amn->adev = adev; + amn->mm = mm; + init_rwsem(&amn->lock); + amn->type = type; + amn->mn.ops = &amdgpu_mn_ops[type]; + amn->objects = RB_ROOT_CACHED; + mutex_init(&amn->read_lock); + atomic_set(&amn->recursion, 0); - r = __mmu_notifier_register(&rmn->mn, mm); + r = __mmu_notifier_register(&amn->mn, mm); if (r) - goto free_rmn; + goto free_amn; - hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type)); + hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type)); release_locks: up_write(&mm->mmap_sem); mutex_unlock(&adev->mn_lock); - return rmn; + return amn; -free_rmn: +free_amn: up_write(&mm->mmap_sem); mutex_unlock(&adev->mn_lock); - kfree(rmn); + kfree(amn); return ERR_PTR(r); } @@ -379,14 +379,14 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); enum amdgpu_mn_type type = bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; - struct amdgpu_mn *rmn; + struct amdgpu_mn *amn; struct amdgpu_mn_node *node = NULL, *new_node; struct list_head bos; struct interval_tree_node *it; - rmn = amdgpu_mn_get(adev, type); - if (IS_ERR(rmn)) - return PTR_ERR(rmn); + amn = amdgpu_mn_get(adev, type); + if (IS_ERR(amn)) + return PTR_ERR(amn); new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); if (!new_node) @@ -394,12 +394,12 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) INIT_LIST_HEAD(&bos); - down_write(&rmn->lock); + down_write(&amn->lock); - while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { + while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { kfree(node); node = container_of(it, struct amdgpu_mn_node, it); - interval_tree_remove(&node->it, &rmn->objects); + interval_tree_remove(&node->it, &amn->objects); addr = min(it->start, addr); end = max(it->last, end); list_splice(&node->bos, &bos); @@ -410,7 +410,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) else kfree(new_node); - bo->mn = rmn; + bo->mn = amn; node->it.start = addr; node->it.last = end; @@ -418,9 +418,9 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) list_splice(&bos, &node->bos); list_add(&bo->mn_list, &node->bos); - interval_tree_insert(&node->it, &rmn->objects); + interval_tree_insert(&node->it, &amn->objects); - up_write(&rmn->lock); + up_write(&amn->lock); return 0; } @@ -435,18 +435,18 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) void amdgpu_mn_unregister(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_mn *rmn; + struct amdgpu_mn *amn; struct list_head *head; mutex_lock(&adev->mn_lock); - rmn = bo->mn; - if (rmn == NULL) { + amn = bo->mn; + if (amn == NULL) { mutex_unlock(&adev->mn_lock); return; } - down_write(&rmn->lock); + down_write(&amn->lock); /* save the next list entry for later */ head = bo->mn_list.next; @@ -457,11 +457,11 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) if (list_empty(head)) { struct amdgpu_mn_node *node; node = container_of(head, struct amdgpu_mn_node, bos); - interval_tree_remove(&node->it, &rmn->objects); + interval_tree_remove(&node->it, &amn->objects); kfree(node); } - up_write(&rmn->lock); + up_write(&amn->lock); mutex_unlock(&adev->mn_lock); } -- cgit v1.2.3 From ad7f0b6334fe3cf52f2d79345791a4ef4547353f Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 5 Jun 2018 11:47:43 +0200 Subject: drm/amdgpu: fix documentation of amdgpu_mn.c v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And wire it up as well. v2: improve the wording, fix label mismatch Signed-off-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu.rst | 9 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 74 ++++++++++++++++++++++++++-------- 2 files changed, 67 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c') diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst index 1fbf3876a3d8..a4852f9a6141 100644 --- a/Documentation/gpu/amdgpu.rst +++ b/Documentation/gpu/amdgpu.rst @@ -35,3 +35,12 @@ PRIME Buffer Sharing .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c :internal: + +MMU Notifier +------------ + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c + :doc: MMU Notifier + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c + :internal: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 37570a1c6db8..40fcb2af2914 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -28,6 +28,21 @@ * Christian König */ +/** + * DOC: MMU Notifier + * + * For coherent userptr handling registers an MMU notifier to inform the driver + * about updates on the page tables of a process. + * + * When somebody tries to invalidate the page tables we block the update until + * all operations on the pages in question are completed, then those pages are + * marked as accessed and also dirty if it wasn't a read only access. + * + * New command submissions using the userptrs in question are delayed until all + * page table invalidation are completed and we once more see a coherent process + * address space. + */ + #include #include #include @@ -38,6 +53,21 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" +/** + * struct amdgpu_mn + * + * @adev: amdgpu device pointer + * @mm: process address space + * @mn: MMU notifier structur + * @work: destrution work item + * @node: hash table node to find structure by adev and mn + * @lock: rw semaphore protecting the notifier nodes + * @objects: interval tree containing amdgpu_mn_nodes + * @read_lock: mutex for recursive locking of @lock + * @recursion: depth of recursion + * + * Data for each amdgpu device and process address space. + */ struct amdgpu_mn { /* constant after initialisation */ struct amdgpu_device *adev; @@ -58,13 +88,21 @@ struct amdgpu_mn { atomic_t recursion; }; +/** + * struct amdgpu_mn_node + * + * @it: interval node defining start-last of the affected address range + * @bos: list of all BOs in the affected address range + * + * Manages all BOs which are affected of a certain range of address space. + */ struct amdgpu_mn_node { struct interval_tree_node it; struct list_head bos; }; /** - * amdgpu_mn_destroy - destroy the amn + * amdgpu_mn_destroy - destroy the MMU notifier * * @work: previously sheduled work item * @@ -98,7 +136,7 @@ static void amdgpu_mn_destroy(struct work_struct *work) * amdgpu_mn_release - callback to notify about mm destruction * * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about * * Shedule a work item to lazy destroy our notifier. */ @@ -106,13 +144,16 @@ static void amdgpu_mn_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + INIT_WORK(&amn->work, amdgpu_mn_destroy); schedule_work(&amn->work); } /** - * amdgpu_mn_lock - take the write side lock for this mn + * amdgpu_mn_lock - take the write side lock for this notifier + * + * @mn: our notifier */ void amdgpu_mn_lock(struct amdgpu_mn *mn) { @@ -121,7 +162,9 @@ void amdgpu_mn_lock(struct amdgpu_mn *mn) } /** - * amdgpu_mn_unlock - drop the write side lock for this mn + * amdgpu_mn_unlock - drop the write side lock for this notifier + * + * @mn: our notifier */ void amdgpu_mn_unlock(struct amdgpu_mn *mn) { @@ -130,11 +173,9 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) } /** - * amdgpu_mn_read_lock - take the amn read lock + * amdgpu_mn_read_lock - take the read side lock for this notifier * * @amn: our notifier - * - * Take the amn read side lock. */ static void amdgpu_mn_read_lock(struct amdgpu_mn *amn) { @@ -145,11 +186,9 @@ static void amdgpu_mn_read_lock(struct amdgpu_mn *amn) } /** - * amdgpu_mn_read_unlock - drop the amn read lock + * amdgpu_mn_read_unlock - drop the read side lock for this notifier * * @amn: our notifier - * - * Drop the amn read side lock. */ static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) { @@ -161,9 +200,11 @@ static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) * amdgpu_mn_invalidate_node - unmap all BOs of a node * * @node: the node with the BOs to unmap + * @start: start of address range affected + * @end: end of address range affected * - * We block for all BOs and unmap them by move them - * into system domain again. + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty. */ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, unsigned long start, @@ -190,12 +231,12 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change * * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about * @start: start of updated range * @end: end of updated range * - * We block for all BOs between start and end to be idle and - * unmap them by move them into system domain again. + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty. */ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, struct mm_struct *mm, @@ -268,7 +309,7 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, * amdgpu_mn_invalidate_range_end - callback to notify about mm change * * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about * @start: start of updated range * @end: end of updated range * @@ -456,6 +497,7 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) if (list_empty(head)) { struct amdgpu_mn_node *node; + node = container_of(head, struct amdgpu_mn_node, bos); interval_tree_remove(&node->it, &amn->objects); kfree(node); -- cgit v1.2.3 From f4557923b5d3bd5aaa4177ea184c4389175cc92f Mon Sep 17 00:00:00 2001 From: Slava Abramov Date: Wed, 13 Jun 2018 10:50:31 -0400 Subject: drm/amdgpu: fix typo in amdgpu_mn.c comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In doc comments for struct amdgpu_mn: destrution -> destruction Signed-off-by: Slava Abramov Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 40fcb2af2914..72a3e8c68876 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -59,7 +59,7 @@ * @adev: amdgpu device pointer * @mm: process address space * @mn: MMU notifier structur - * @work: destrution work item + * @work: destruction work item * @node: hash table node to find structure by adev and mn * @lock: rw semaphore protecting the notifier nodes * @objects: interval tree containing amdgpu_mn_nodes -- cgit v1.2.3 From 87e3f1366eaa82c78b826b38008987406470b03d Mon Sep 17 00:00:00 2001 From: Darren Powell Date: Mon, 25 Jun 2018 19:04:03 -0400 Subject: drm/amd: Remove errors from sphinx documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminating the warnings produced by sphinx when processing the sphinx comments in amdgpu_device.c & amdgpu_mn.c Signed-off-by: Darren Powell Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 ++++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 5 +++-- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d43abbd2a3cc..9883fa9bb41b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1076,7 +1076,7 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { /** * amdgpu_device_ip_set_clockgating_state - set the CG state * - * @adev: amdgpu_device pointer + * @dev: amdgpu_device pointer * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) * @state: clockgating state (gate or ungate) * @@ -1110,7 +1110,7 @@ int amdgpu_device_ip_set_clockgating_state(void *dev, /** * amdgpu_device_ip_set_powergating_state - set the PG state * - * @adev: amdgpu_device pointer + * @dev: amdgpu_device pointer * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) * @state: powergating state (gate or ungate) * @@ -1221,7 +1221,7 @@ bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, * amdgpu_device_ip_get_ip_block - get a hw IP pointer * * @adev: amdgpu_device pointer - * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) + * @type: Type of hardware IP (SMU, GFX, UVD, etc.) * * Returns a pointer to the hardware IP block structure * if it exists for the asic, otherwise NULL. @@ -2229,7 +2229,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) * amdgpu_device_init - initialize the driver * * @adev: amdgpu_device pointer - * @pdev: drm dev pointer + * @ddev: drm dev pointer * @pdev: pci dev pointer * @flags: driver flags * @@ -2602,8 +2602,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev) /** * amdgpu_device_suspend - initiate device suspend * - * @pdev: drm dev pointer - * @state: suspend state + * @dev: drm dev pointer + * @suspend: suspend state + * @fbcon : notify the fbdev of suspend * * Puts the hw in the suspend state (all asics). * Returns 0 for success or an error on failure. @@ -2701,7 +2702,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) /** * amdgpu_device_resume - initiate device resume * - * @pdev: drm dev pointer + * @dev: drm dev pointer + * @resume: resume state + * @fbcon : notify the fbdev of resume * * Bring the hw back to operating state (all asics). * Returns 0 for success or an error on failure. @@ -3164,6 +3167,7 @@ out: * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * * @adev: amdgpu device pointer + * @from_hypervisor: request from hypervisor * * do VF FLR and reinitialize Asic * return 0 means successed otherwise failed @@ -3211,7 +3215,7 @@ error: * * @adev: amdgpu device pointer * @job: which job trigger hang - * @force forces reset regardless of amdgpu_gpu_recovery + * @force: forces reset regardless of amdgpu_gpu_recovery * * Attempt to reset the GPU if it has hung (all asics). * Returns 0 for success or an error on failure. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 72a3e8c68876..a365ea2383d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -58,7 +58,8 @@ * * @adev: amdgpu device pointer * @mm: process address space - * @mn: MMU notifier structur + * @mn: MMU notifier structure + * @type: type of MMU notifier * @work: destruction work item * @node: hash table node to find structure by adev and mn * @lock: rw semaphore protecting the notifier nodes @@ -266,7 +267,7 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change * * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about * @start: start of updated range * @end: end of updated range * -- cgit v1.2.3 From 93065ac753e4443840a057bfef4be71ec766fde9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 21 Aug 2018 21:52:33 -0700 Subject: mm, oom: distinguish blockable mode for mmu notifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are several blockable mmu notifiers which might sleep in mmu_notifier_invalidate_range_start and that is a problem for the oom_reaper because it needs to guarantee a forward progress so it cannot depend on any sleepable locks. Currently we simply back off and mark an oom victim with blockable mmu notifiers as done after a short sleep. That can result in selecting a new oom victim prematurely because the previous one still hasn't torn its memory down yet. We can do much better though. Even if mmu notifiers use sleepable locks there is no reason to automatically assume those locks are held. Moreover majority of notifiers only care about a portion of the address space and there is absolutely zero reason to fail when we are unmapping an unrelated range. Many notifiers do really block and wait for HW which is harder to handle and we have to bail out though. This patch handles the low hanging fruit. __mmu_notifier_invalidate_range_start gets a blockable flag and callbacks are not allowed to sleep if the flag is set to false. This is achieved by using trylock instead of the sleepable lock for most callbacks and continue as long as we do not block down the call chain. I think we can improve that even further because there is a common pattern to do a range lookup first and then do something about that. The first part can be done without a sleeping lock in most cases AFAICS. The oom_reaper end then simply retries if there is at least one notifier which couldn't make any progress in !blockable mode. A retry loop is already implemented to wait for the mmap_sem and this is basically the same thing. The simplest way for driver developers to test this code path is to wrap userspace code which uses these notifiers into a memcg and set the hard limit to hit the oom. This can be done e.g. after the test faults in all the mmu notifier managed memory and set the hard limit to something really small. Then we are looking for a proper process tear down. [akpm@linux-foundation.org: coding style fixes] [akpm@linux-foundation.org: minor code simplification] Link: http://lkml.kernel.org/r/20180716115058.5559-1-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Christian König # AMD notifiers Acked-by: Leon Romanovsky # mlx and umem_odp Reported-by: David Rientjes Cc: "David (ChunMing) Zhou" Cc: Paolo Bonzini Cc: Alex Deucher Cc: David Airlie Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Doug Ledford Cc: Jason Gunthorpe Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Sudeep Dutt Cc: Ashutosh Dixit Cc: Dimitri Sivanich Cc: Boris Ostrovsky Cc: Juergen Gross Cc: "Jérôme Glisse" Cc: Andrea Arcangeli Cc: Felix Kuehling Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kvm/x86.c | 7 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 43 ++++++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_gem_userptr.c | 13 +++++++--- drivers/gpu/drm/radeon/radeon_mn.c | 22 ++++++++++++++--- drivers/infiniband/core/umem_odp.c | 33 +++++++++++++++++++------ drivers/infiniband/hw/hfi1/mmu_rb.c | 11 ++++++--- drivers/infiniband/hw/mlx5/odp.c | 2 +- drivers/misc/mic/scif/scif_dma.c | 7 ++++-- drivers/misc/sgi-gru/grutlbpurge.c | 7 ++++-- drivers/xen/gntdev.c | 44 ++++++++++++++++++++++++++------- include/linux/kvm_host.h | 4 +-- include/linux/mmu_notifier.h | 33 +++++++++++++++++++------ include/linux/oom.h | 2 +- include/rdma/ib_umem_odp.h | 3 ++- mm/hmm.c | 7 ++++-- mm/mmap.c | 2 +- mm/mmu_notifier.c | 19 +++++++++++--- mm/oom_kill.c | 29 +++++++++++----------- virt/kvm/kvm_main.c | 15 +++++++---- 19 files changed, 223 insertions(+), 80 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f7dff0457846..4a74a7cf0a8b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7305,8 +7305,9 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); } -void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end) +int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end, + bool blockable) { unsigned long apic_address; @@ -7317,6 +7318,8 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); if (start <= apic_address && apic_address < end) kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); + + return 0; } void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index a365ea2383d1..e55508b39496 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -178,12 +178,18 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) * * @amn: our notifier */ -static void amdgpu_mn_read_lock(struct amdgpu_mn *amn) +static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) { - mutex_lock(&amn->read_lock); + if (blockable) + mutex_lock(&amn->read_lock); + else if (!mutex_trylock(&amn->read_lock)) + return -EAGAIN; + if (atomic_inc_return(&amn->recursion) == 1) down_read_non_owner(&amn->lock); mutex_unlock(&amn->read_lock); + + return 0; } /** @@ -239,10 +245,11 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, +static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, - unsigned long end) + unsigned long end, + bool blockable) { struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; @@ -250,17 +257,28 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, /* notification is exclusive, but interval is inclusive */ end -= 1; - amdgpu_mn_read_lock(amn); + /* TODO we should be able to split locking for interval tree and + * amdgpu_mn_invalidate_node + */ + if (amdgpu_mn_read_lock(amn, blockable)) + return -EAGAIN; it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; + if (!blockable) { + amdgpu_mn_read_unlock(amn); + return -EAGAIN; + } + node = container_of(it, struct amdgpu_mn_node, it); it = interval_tree_iter_next(it, start, end); amdgpu_mn_invalidate_node(node, start, end); } + + return 0; } /** @@ -275,10 +293,11 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, * necessitates evicting all user-mode queues of the process. The BOs * are restorted in amdgpu_mn_invalidate_range_end_hsa. */ -static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, +static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, - unsigned long end) + unsigned long end, + bool blockable) { struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; @@ -286,13 +305,19 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, /* notification is exclusive, but interval is inclusive */ end -= 1; - amdgpu_mn_read_lock(amn); + if (amdgpu_mn_read_lock(amn, blockable)) + return -EAGAIN; it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; struct amdgpu_bo *bo; + if (!blockable) { + amdgpu_mn_read_unlock(amn); + return -EAGAIN; + } + node = container_of(it, struct amdgpu_mn_node, it); it = interval_tree_iter_next(it, start, end); @@ -304,6 +329,8 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, amdgpu_amdkfd_evict_userptr(mem, mm); } } + + return 0; } /** diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index dcd6e230d16a..2c9b284036d1 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -112,10 +112,11 @@ static void del_object(struct i915_mmu_object *mo) mo->attached = false; } -static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, +static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, struct mm_struct *mm, unsigned long start, - unsigned long end) + unsigned long end, + bool blockable) { struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn); @@ -124,7 +125,7 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, LIST_HEAD(cancelled); if (RB_EMPTY_ROOT(&mn->objects.rb_root)) - return; + return 0; /* interval ranges are inclusive, but invalidate range is exclusive */ end--; @@ -132,6 +133,10 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, spin_lock(&mn->lock); it = interval_tree_iter_first(&mn->objects, start, end); while (it) { + if (!blockable) { + spin_unlock(&mn->lock); + return -EAGAIN; + } /* The mmu_object is released late when destroying the * GEM object so it is entirely possible to gain a * reference on an object in the process of being freed @@ -154,6 +159,8 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, if (!list_empty(&cancelled)) flush_workqueue(mn->wq); + + return 0; } static const struct mmu_notifier_ops i915_gem_userptr_notifier = { diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index abd24975c9b1..f8b35df44c60 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -118,19 +118,27 @@ static void radeon_mn_release(struct mmu_notifier *mn, * We block for all BOs between start and end to be idle and * unmap them by move them into system domain again. */ -static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn, +static int radeon_mn_invalidate_range_start(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, - unsigned long end) + unsigned long end, + bool blockable) { struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn); struct ttm_operation_ctx ctx = { false, false }; struct interval_tree_node *it; + int ret = 0; /* notification is exclusive, but interval is inclusive */ end -= 1; - mutex_lock(&rmn->lock); + /* TODO we should be able to split locking for interval tree and + * the tear down. + */ + if (blockable) + mutex_lock(&rmn->lock); + else if (!mutex_trylock(&rmn->lock)) + return -EAGAIN; it = interval_tree_iter_first(&rmn->objects, start, end); while (it) { @@ -138,6 +146,11 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn, struct radeon_bo *bo; long r; + if (!blockable) { + ret = -EAGAIN; + goto out_unlock; + } + node = container_of(it, struct radeon_mn_node, it); it = interval_tree_iter_next(it, start, end); @@ -166,7 +179,10 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn, } } +out_unlock: mutex_unlock(&rmn->lock); + + return ret; } static const struct mmu_notifier_ops radeon_mn_ops = { diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 182436b92ba9..6ec748eccff7 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -186,6 +186,7 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn, rbt_ib_umem_for_each_in_range(&context->umem_tree, 0, ULLONG_MAX, ib_umem_notifier_release_trampoline, + true, NULL); up_read(&context->umem_rwsem); } @@ -207,22 +208,31 @@ static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start, return 0; } -static void ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, +static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, - unsigned long end) + unsigned long end, + bool blockable) { struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); + int ret; if (!context->invalidate_range) - return; + return 0; + + if (blockable) + down_read(&context->umem_rwsem); + else if (!down_read_trylock(&context->umem_rwsem)) + return -EAGAIN; ib_ucontext_notifier_start_account(context); - down_read(&context->umem_rwsem); - rbt_ib_umem_for_each_in_range(&context->umem_tree, start, + ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start, end, - invalidate_range_start_trampoline, NULL); + invalidate_range_start_trampoline, + blockable, NULL); up_read(&context->umem_rwsem); + + return ret; } static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start, @@ -242,10 +252,15 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn, if (!context->invalidate_range) return; + /* + * TODO: we currently bail out if there is any sleepable work to be done + * in ib_umem_notifier_invalidate_range_start so we shouldn't really block + * here. But this is ugly and fragile. + */ down_read(&context->umem_rwsem); rbt_ib_umem_for_each_in_range(&context->umem_tree, start, end, - invalidate_range_end_trampoline, NULL); + invalidate_range_end_trampoline, true, NULL); up_read(&context->umem_rwsem); ib_ucontext_notifier_end_account(context); } @@ -798,6 +813,7 @@ EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, u64 start, u64 last, umem_call_back cb, + bool blockable, void *cookie) { int ret_val = 0; @@ -809,6 +825,9 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, for (node = rbt_ib_umem_iter_first(root, start, last - 1); node; node = next) { + /* TODO move the blockable decision up to the callback */ + if (!blockable) + return -EAGAIN; next = rbt_ib_umem_iter_next(node, start, last - 1); umem = container_of(node, struct ib_umem_odp, interval_tree); ret_val = cb(umem->umem, start, last, cookie) || ret_val; diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 70aceefe14d5..e1c7996c018e 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -67,9 +67,9 @@ struct mmu_rb_handler { static unsigned long mmu_node_start(struct mmu_rb_node *); static unsigned long mmu_node_last(struct mmu_rb_node *); -static void mmu_notifier_range_start(struct mmu_notifier *, +static int mmu_notifier_range_start(struct mmu_notifier *, struct mm_struct *, - unsigned long, unsigned long); + unsigned long, unsigned long, bool); static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, unsigned long, unsigned long); static void do_remove(struct mmu_rb_handler *handler, @@ -284,10 +284,11 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, handler->ops->remove(handler->ops_arg, node); } -static void mmu_notifier_range_start(struct mmu_notifier *mn, +static int mmu_notifier_range_start(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, - unsigned long end) + unsigned long end, + bool blockable) { struct mmu_rb_handler *handler = container_of(mn, struct mmu_rb_handler, mn); @@ -313,6 +314,8 @@ static void mmu_notifier_range_start(struct mmu_notifier *mn, if (added) queue_work(handler->wq, &handler->del_work); + + return 0; } /* diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index f1a87a690a4c..d216e0d2921d 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -488,7 +488,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) down_read(&ctx->umem_rwsem); rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX, - mr_leaf_free, imr); + mr_leaf_free, true, imr); up_read(&ctx->umem_rwsem); wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c index 63d6246d6dff..6369aeaa7056 100644 --- a/drivers/misc/mic/scif/scif_dma.c +++ b/drivers/misc/mic/scif/scif_dma.c @@ -200,15 +200,18 @@ static void scif_mmu_notifier_release(struct mmu_notifier *mn, schedule_work(&scif_info.misc_work); } -static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, +static int scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, - unsigned long end) + unsigned long end, + bool blockable) { struct scif_mmu_notif *mmn; mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier); scif_rma_destroy_tcw(mmn, start, end - start); + + return 0; } static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index a3454eb56fbf..be28f05bfafa 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -219,9 +219,10 @@ void gru_flush_all_tlb(struct gru_state *gru) /* * MMUOPS notifier callout functions */ -static void gru_invalidate_range_start(struct mmu_notifier *mn, +static int gru_invalidate_range_start(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, + bool blockable) { struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, ms_notifier); @@ -231,6 +232,8 @@ static void gru_invalidate_range_start(struct mmu_notifier *mn, gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms, start, end, atomic_read(&gms->ms_range_active)); gru_flush_tlb_range(gms, start, end - start); + + return 0; } static void gru_invalidate_range_end(struct mmu_notifier *mn, diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index c866a62f766d..57390c7666e5 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -479,18 +479,25 @@ static const struct vm_operations_struct gntdev_vmops = { /* ------------------------------------------------------------------ */ +static bool in_range(struct gntdev_grant_map *map, + unsigned long start, unsigned long end) +{ + if (!map->vma) + return false; + if (map->vma->vm_start >= end) + return false; + if (map->vma->vm_end <= start) + return false; + + return true; +} + static void unmap_if_in_range(struct gntdev_grant_map *map, unsigned long start, unsigned long end) { unsigned long mstart, mend; int err; - if (!map->vma) - return; - if (map->vma->vm_start >= end) - return; - if (map->vma->vm_end <= start) - return; mstart = max(start, map->vma->vm_start); mend = min(end, map->vma->vm_end); pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", @@ -503,21 +510,40 @@ static void unmap_if_in_range(struct gntdev_grant_map *map, WARN_ON(err); } -static void mn_invl_range_start(struct mmu_notifier *mn, +static int mn_invl_range_start(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, + bool blockable) { struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); struct gntdev_grant_map *map; + int ret = 0; + + /* TODO do we really need a mutex here? */ + if (blockable) + mutex_lock(&priv->lock); + else if (!mutex_trylock(&priv->lock)) + return -EAGAIN; - mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { + if (in_range(map, start, end)) { + ret = -EAGAIN; + goto out_unlock; + } unmap_if_in_range(map, start, end); } list_for_each_entry(map, &priv->freeable_maps, next) { + if (in_range(map, start, end)) { + ret = -EAGAIN; + goto out_unlock; + } unmap_if_in_range(map, start, end); } + +out_unlock: mutex_unlock(&priv->lock); + + return ret; } static void mn_release(struct mmu_notifier *mn, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7c7362dd2faa..0205aee44ded 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1289,8 +1289,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, } #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ -void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end); +int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end, bool blockable); #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu); diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 392e6af82701..133ba78820ee 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -151,13 +151,15 @@ struct mmu_notifier_ops { * address space but may still be referenced by sptes until * the last refcount is dropped. * - * If both of these callbacks cannot block, and invalidate_range - * cannot block, mmu_notifier_ops.flags should have - * MMU_INVALIDATE_DOES_NOT_BLOCK set. + * If blockable argument is set to false then the callback cannot + * sleep and has to return with -EAGAIN. 0 should be returned + * otherwise. + * */ - void (*invalidate_range_start)(struct mmu_notifier *mn, + int (*invalidate_range_start)(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, + bool blockable); void (*invalidate_range_end)(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end); @@ -229,8 +231,9 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte); -extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, - unsigned long start, unsigned long end); +extern int __mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end, + bool blockable); extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, unsigned long start, unsigned long end, bool only_end); @@ -281,7 +284,15 @@ static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end) { if (mm_has_notifiers(mm)) - __mmu_notifier_invalidate_range_start(mm, start, end); + __mmu_notifier_invalidate_range_start(mm, start, end, true); +} + +static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + if (mm_has_notifiers(mm)) + return __mmu_notifier_invalidate_range_start(mm, start, end, false); + return 0; } static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, @@ -461,6 +472,12 @@ static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, { } +static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + return 0; +} + static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, unsigned long start, unsigned long end) { diff --git a/include/linux/oom.h b/include/linux/oom.h index 6adac113e96d..92f70e4c6252 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -95,7 +95,7 @@ static inline int check_stable_address_space(struct mm_struct *mm) return 0; } -void __oom_reap_task_mm(struct mm_struct *mm); +bool __oom_reap_task_mm(struct mm_struct *mm); extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 6a17f856f841..381cdf5a9bd1 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -119,7 +119,8 @@ typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, */ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, u64 start, u64 end, - umem_call_back cb, void *cookie); + umem_call_back cb, + bool blockable, void *cookie); /* * Find first region intersecting with address range. diff --git a/mm/hmm.c b/mm/hmm.c index 76e7a058b32f..0b0554591610 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -177,16 +177,19 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) up_write(&hmm->mirrors_sem); } -static void hmm_invalidate_range_start(struct mmu_notifier *mn, +static int hmm_invalidate_range_start(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, - unsigned long end) + unsigned long end, + bool blockable) { struct hmm *hmm = mm->hmm; VM_BUG_ON(!hmm); atomic_inc(&hmm->sequence); + + return 0; } static void hmm_invalidate_range_end(struct mmu_notifier *mn, diff --git a/mm/mmap.c b/mm/mmap.c index 8d6449e74431..bb2a7e097c7d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3064,7 +3064,7 @@ void exit_mmap(struct mm_struct *mm) * reliably test it. */ mutex_lock(&oom_lock); - __oom_reap_task_mm(mm); + (void)__oom_reap_task_mm(mm); mutex_unlock(&oom_lock); set_bit(MMF_OOM_SKIP, &mm->flags); diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index eff6b88a993f..82bb1a939c0e 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -174,18 +174,29 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, srcu_read_unlock(&srcu, id); } -void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, - unsigned long start, unsigned long end) +int __mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end, + bool blockable) { struct mmu_notifier *mn; + int ret = 0; int id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { - if (mn->ops->invalidate_range_start) - mn->ops->invalidate_range_start(mn, mm, start, end); + if (mn->ops->invalidate_range_start) { + int _ret = mn->ops->invalidate_range_start(mn, mm, start, end, blockable); + if (_ret) { + pr_info("%pS callback failed with %d in %sblockable context.\n", + mn->ops->invalidate_range_start, _ret, + !blockable ? "non-" : ""); + ret = _ret; + } + } } srcu_read_unlock(&srcu, id); + + return ret; } EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 412f43453a68..be31a1e0fe78 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -487,9 +487,10 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait); static struct task_struct *oom_reaper_list; static DEFINE_SPINLOCK(oom_reaper_lock); -void __oom_reap_task_mm(struct mm_struct *mm) +bool __oom_reap_task_mm(struct mm_struct *mm) { struct vm_area_struct *vma; + bool ret = true; /* * Tell all users of get_user/copy_from_user etc... that the content @@ -519,12 +520,17 @@ void __oom_reap_task_mm(struct mm_struct *mm) struct mmu_gather tlb; tlb_gather_mmu(&tlb, mm, start, end); - mmu_notifier_invalidate_range_start(mm, start, end); + if (mmu_notifier_invalidate_range_start_nonblock(mm, start, end)) { + ret = false; + continue; + } unmap_page_range(&tlb, vma, start, end, NULL); mmu_notifier_invalidate_range_end(mm, start, end); tlb_finish_mmu(&tlb, start, end); } } + + return ret; } static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) @@ -553,18 +559,6 @@ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) goto unlock_oom; } - /* - * If the mm has invalidate_{start,end}() notifiers that could block, - * sleep to give the oom victim some more time. - * TODO: we really want to get rid of this ugly hack and make sure that - * notifiers cannot block for unbounded amount of time - */ - if (mm_has_blockable_invalidate_notifiers(mm)) { - up_read(&mm->mmap_sem); - schedule_timeout_idle(HZ); - goto unlock_oom; - } - /* * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't * work on the mm anymore. The check for MMF_OOM_SKIP must run @@ -579,7 +573,12 @@ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) trace_start_task_reaping(tsk->pid); - __oom_reap_task_mm(mm); + /* failed to reap part of the address space. Try again later */ + if (!__oom_reap_task_mm(mm)) { + up_read(&mm->mmap_sem); + ret = false; + goto unlock_oom; + } pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", task_pid_nr(tsk), tsk->comm, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9263ead9fd32..0116b449b993 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -140,9 +140,10 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); static unsigned long long kvm_createvm_count; static unsigned long long kvm_active_vms; -__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end) +__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end, bool blockable) { + return 0; } bool kvm_is_reserved_pfn(kvm_pfn_t pfn) @@ -360,13 +361,15 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, srcu_read_unlock(&kvm->srcu, idx); } -static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, +static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, - unsigned long end) + unsigned long end, + bool blockable) { struct kvm *kvm = mmu_notifier_to_kvm(mn); int need_tlb_flush = 0, idx; + int ret; idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); @@ -384,9 +387,11 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, spin_unlock(&kvm->mmu_lock); - kvm_arch_mmu_notifier_invalidate_range(kvm, start, end); + ret = kvm_arch_mmu_notifier_invalidate_range(kvm, start, end, blockable); srcu_read_unlock(&kvm->srcu, idx); + + return ret; } static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, -- cgit v1.2.3