summaryrefslogtreecommitdiff
path: root/kernel/bpf/rqspinlock.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/rqspinlock.c')
-rw-r--r--kernel/bpf/rqspinlock.c90
1 files changed, 46 insertions, 44 deletions
diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c
index a00561b1d3e5..f7d0c8d4644e 100644
--- a/kernel/bpf/rqspinlock.c
+++ b/kernel/bpf/rqspinlock.c
@@ -89,15 +89,14 @@ struct rqspinlock_timeout {
DEFINE_PER_CPU_ALIGNED(struct rqspinlock_held, rqspinlock_held_locks);
EXPORT_SYMBOL_GPL(rqspinlock_held_locks);
-static bool is_lock_released(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts)
+static bool is_lock_released(rqspinlock_t *lock, u32 mask)
{
if (!(atomic_read_acquire(&lock->val) & (mask)))
return true;
return false;
}
-static noinline int check_deadlock_AA(rqspinlock_t *lock, u32 mask,
- struct rqspinlock_timeout *ts)
+static noinline int check_deadlock_AA(rqspinlock_t *lock)
{
struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);
int cnt = min(RES_NR_HELD, rqh->cnt);
@@ -118,8 +117,7 @@ static noinline int check_deadlock_AA(rqspinlock_t *lock, u32 mask,
* more locks, which reduce to ABBA). This is not exhaustive, and we rely on
* timeouts as the final line of defense.
*/
-static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask,
- struct rqspinlock_timeout *ts)
+static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask)
{
struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);
int rqh_cnt = min(RES_NR_HELD, rqh->cnt);
@@ -142,7 +140,7 @@ static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask,
* Let's ensure to break out of this loop if the lock is available for
* us to potentially acquire.
*/
- if (is_lock_released(lock, mask, ts))
+ if (is_lock_released(lock, mask))
return 0;
/*
@@ -198,33 +196,21 @@ static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask,
return 0;
}
-static noinline int check_deadlock(rqspinlock_t *lock, u32 mask,
- struct rqspinlock_timeout *ts)
-{
- int ret;
-
- ret = check_deadlock_AA(lock, mask, ts);
- if (ret)
- return ret;
- ret = check_deadlock_ABBA(lock, mask, ts);
- if (ret)
- return ret;
-
- return 0;
-}
-
static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
struct rqspinlock_timeout *ts)
{
- u64 time = ktime_get_mono_fast_ns();
u64 prev = ts->cur;
+ u64 time;
if (!ts->timeout_end) {
- ts->cur = time;
- ts->timeout_end = time + ts->duration;
+ if (check_deadlock_AA(lock))
+ return -EDEADLK;
+ ts->cur = ktime_get_mono_fast_ns();
+ ts->timeout_end = ts->cur + ts->duration;
return 0;
}
+ time = ktime_get_mono_fast_ns();
if (time > ts->timeout_end)
return -ETIMEDOUT;
@@ -234,7 +220,7 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
*/
if (prev + NSEC_PER_MSEC < time) {
ts->cur = time;
- return check_deadlock(lock, mask, ts);
+ return check_deadlock_ABBA(lock, mask);
}
return 0;
@@ -278,6 +264,10 @@ int __lockfunc resilient_tas_spin_lock(rqspinlock_t *lock)
int val, ret = 0;
RES_INIT_TIMEOUT(ts);
+ /*
+ * The fast path is not invoked for the TAS fallback, so we must grab
+ * the deadlock detection entry here.
+ */
grab_held_lock_entry(lock);
/*
@@ -400,10 +390,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
goto queue;
}
- /*
- * Grab an entry in the held locks array, to enable deadlock detection.
- */
- grab_held_lock_entry(lock);
+ /* Deadlock detection entry already held after failing fast path. */
/*
* We're pending, wait for the owner to go away.
@@ -450,12 +437,21 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
* queuing.
*/
queue:
- lockevent_inc(lock_slowpath);
/*
- * Grab deadlock detection entry for the queue path.
+ * Do not queue if we're a waiter and someone is attempting this lock on
+ * the same CPU. In case of NMIs, this prevents long timeouts where we
+ * interrupt the pending waiter, and the owner, that will eventually
+ * signal the head of our queue, both of which are logically but not
+ * physically part of the queue, hence outside the scope of the idx > 0
+ * check above for the trylock fallback.
*/
- grab_held_lock_entry(lock);
+ if (check_deadlock_AA(lock)) {
+ ret = -EDEADLK;
+ goto err_release_entry;
+ }
+ lockevent_inc(lock_slowpath);
+ /* Deadlock detection entry already held after failing fast path. */
node = this_cpu_ptr(&rqnodes[0].mcs);
idx = node->count++;
tail = encode_tail(smp_processor_id(), idx);
@@ -467,19 +463,17 @@ queue:
* not be nested NMIs taking spinlocks. That may not be true in
* some architectures even though the chance of needing more than
* 4 nodes will still be extremely unlikely. When that happens,
- * we fall back to spinning on the lock directly without using
- * any MCS node. This is not the most elegant solution, but is
- * simple enough.
+ * we fall back to attempting a trylock operation without using
+ * any MCS node. Unlike qspinlock which cannot fail, we have the
+ * option of failing the slow path, and under contention, such a
+ * trylock spinning will likely be treated unfairly due to lack of
+ * queueing, hence do not spin.
*/
- if (unlikely(idx >= _Q_MAX_NODES || in_nmi())) {
+ if (unlikely(idx >= _Q_MAX_NODES || (in_nmi() && idx > 0))) {
lockevent_inc(lock_no_node);
- RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT);
- while (!queued_spin_trylock(lock)) {
- if (RES_CHECK_TIMEOUT(ts, ret, ~0u)) {
- lockevent_inc(rqspinlock_lock_timeout);
- goto err_release_node;
- }
- cpu_relax();
+ if (!queued_spin_trylock(lock)) {
+ ret = -EDEADLK;
+ goto err_release_node;
}
goto release;
}
@@ -540,7 +534,7 @@ queue:
val = arch_mcs_spin_lock_contended(&node->locked);
if (val == RES_TIMEOUT_VAL) {
- ret = -EDEADLK;
+ ret = -ETIMEDOUT;
goto waitq_timeout;
}
@@ -575,6 +569,14 @@ queue:
val = res_atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK) ||
RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK));
+ /* Disable queue destruction when we detect deadlocks. */
+ if (ret == -EDEADLK) {
+ if (!next)
+ next = smp_cond_load_relaxed(&node->next, (VAL));
+ arch_mcs_spin_unlock_contended(&next->locked);
+ goto err_release_node;
+ }
+
waitq_timeout:
if (ret) {
/*