summaryrefslogtreecommitdiff
path: root/mm/mmap_lock.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap_lock.c')
-rw-r--r--mm/mmap_lock.c109
1 files changed, 100 insertions, 9 deletions
diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
index b006cec8e6fe..0a0db5849b8e 100644
--- a/mm/mmap_lock.c
+++ b/mm/mmap_lock.c
@@ -128,6 +128,95 @@ void vma_mark_detached(struct vm_area_struct *vma)
}
/*
+ * Try to read-lock a vma. The function is allowed to occasionally yield false
+ * locked result to avoid performance overhead, in which case we fall back to
+ * using mmap_lock. The function should never yield false unlocked result.
+ * False locked result is possible if mm_lock_seq overflows or if vma gets
+ * reused and attached to a different mm before we lock it.
+ * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
+ * detached.
+ *
+ * IMPORTANT: RCU lock must be held upon entering the function, but upon error
+ * IT IS RELEASED. The caller must handle this correctly.
+ */
+static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
+ struct vm_area_struct *vma)
+{
+ struct mm_struct *other_mm;
+ int oldcnt;
+
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held");
+ /*
+ * Check before locking. A race might cause false locked result.
+ * We can use READ_ONCE() for the mm_lock_seq here, and don't need
+ * ACQUIRE semantics, because this is just a lockless check whose result
+ * we don't rely on for anything - the mm_lock_seq read against which we
+ * need ordering is below.
+ */
+ if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) {
+ vma = NULL;
+ goto err;
+ }
+
+ /*
+ * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
+ * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
+ * Acquire fence is required here to avoid reordering against later
+ * vm_lock_seq check and checks inside lock_vma_under_rcu().
+ */
+ if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
+ VMA_REF_LIMIT))) {
+ /* return EAGAIN if vma got detached from under us */
+ vma = oldcnt ? NULL : ERR_PTR(-EAGAIN);
+ goto err;
+ }
+
+ rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
+
+ if (unlikely(vma->vm_mm != mm))
+ goto err_unstable;
+
+ /*
+ * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
+ * False unlocked result is impossible because we modify and check
+ * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
+ * modification invalidates all existing locks.
+ *
+ * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
+ * racing with vma_end_write_all(), we only start reading from the VMA
+ * after it has been unlocked.
+ * This pairs with RELEASE semantics in vma_end_write_all().
+ */
+ if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
+ vma_refcount_put(vma);
+ vma = NULL;
+ goto err;
+ }
+
+ return vma;
+err:
+ rcu_read_unlock();
+
+ return vma;
+err_unstable:
+ /*
+ * If vma got attached to another mm from under us, that mm is not
+ * stable and can be freed in the narrow window after vma->vm_refcnt
+ * is dropped and before rcuwait_wake_up(mm) is called. Grab it before
+ * releasing vma->vm_refcnt.
+ */
+ other_mm = vma->vm_mm; /* use a copy as vma can be freed after we drop vm_refcnt */
+
+ /* __mmdrop() is a heavy operation, do it after dropping RCU lock. */
+ rcu_read_unlock();
+ mmgrab(other_mm);
+ vma_refcount_put(vma);
+ mmdrop(other_mm);
+
+ return NULL;
+}
+
+/*
* Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be
* stable and not isolated. If the VMA is not found or is being modified the
* function returns NULL.
@@ -138,11 +227,13 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
MA_STATE(mas, &mm->mm_mt, address, address);
struct vm_area_struct *vma;
- rcu_read_lock();
retry:
+ rcu_read_lock();
vma = mas_walk(&mas);
- if (!vma)
+ if (!vma) {
+ rcu_read_unlock();
goto inval;
+ }
vma = vma_start_read(mm, vma);
if (IS_ERR_OR_NULL(vma)) {
@@ -162,18 +253,17 @@ retry:
* From here on, we can access the VMA without worrying about which
* fields are accessible for RCU readers.
*/
+ rcu_read_unlock();
/* Check if the vma we locked is the right one. */
- if (unlikely(address < vma->vm_start || address >= vma->vm_end))
- goto inval_end_read;
+ if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
+ vma_end_read(vma);
+ goto inval;
+ }
- rcu_read_unlock();
return vma;
-inval_end_read:
- vma_end_read(vma);
inval:
- rcu_read_unlock();
count_vm_vma_lock_event(VMA_LOCK_ABORT);
return NULL;
}
@@ -228,6 +318,7 @@ retry:
*/
if (PTR_ERR(vma) == -EAGAIN) {
/* reset to search from the last address */
+ rcu_read_lock();
vma_iter_set(vmi, from_addr);
goto retry;
}
@@ -257,9 +348,9 @@ retry:
return vma;
fallback_unlock:
+ rcu_read_unlock();
vma_end_read(vma);
fallback:
- rcu_read_unlock();
vma = lock_next_vma_under_mmap_lock(mm, vmi, from_addr);
rcu_read_lock();
/* Reinitialize the iterator after re-entering rcu read section */