diff options
| author | Josh Snyder <joshs@netflix.com> | 2017-12-18 16:15:10 +0000 | 
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2018-01-16 03:29:36 +0100 | 
| commit | c96f5471ce7d2aefd0dda560cc23f08ab00bc65d (patch) | |
| tree | 84feeca0e7b0819b55216e1fea50a3e0e79d445b /kernel | |
| parent | a8750ddca918032d6349adbf9a4b6555e7db20da (diff) | |
delayacct: Account blkio completion on the correct task
Before commit:
  e33a9bba85a8 ("sched/core: move IO scheduling accounting from io_schedule_timeout() into scheduler")
delayacct_blkio_end() was called after context-switching into the task which
completed I/O.
This resulted in double counting: the task would account a delay both waiting
for I/O and for time spent in the runqueue.
With e33a9bba85a8, delayacct_blkio_end() is called by try_to_wake_up().
In ttwu, we have not yet context-switched. This is more correct, in that
the delay accounting ends when the I/O is complete.
But delayacct_blkio_end() relies on 'get_current()', and we have not yet
context-switched into the task whose I/O completed. This results in the
wrong task having its delay accounting statistics updated.
Instead of doing that, pass the task_struct being woken to delayacct_blkio_end(),
so that it can update the statistics of the correct task.
Signed-off-by: Josh Snyder <joshs@netflix.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Cc: <stable@vger.kernel.org>
Cc: Brendan Gregg <bgregg@netflix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-block@vger.kernel.org
Fixes: e33a9bba85a8 ("sched/core: move IO scheduling accounting from io_schedule_timeout() into scheduler")
Link: http://lkml.kernel.org/r/1513613712-571-1-git-send-email-joshs@netflix.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/delayacct.c | 42 | ||||
| -rw-r--r-- | kernel/sched/core.c | 6 | 
2 files changed, 29 insertions, 19 deletions
diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 4a1c33416b6a..e2764d767f18 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -51,16 +51,16 @@ void __delayacct_tsk_init(struct task_struct *tsk)   * Finish delay accounting for a statistic using its timestamps (@start),   * accumalator (@total) and @count   */ -static void delayacct_end(u64 *start, u64 *total, u32 *count) +static void delayacct_end(spinlock_t *lock, u64 *start, u64 *total, u32 *count)  {  	s64 ns = ktime_get_ns() - *start;  	unsigned long flags;  	if (ns > 0) { -		spin_lock_irqsave(¤t->delays->lock, flags); +		spin_lock_irqsave(lock, flags);  		*total += ns;  		(*count)++; -		spin_unlock_irqrestore(¤t->delays->lock, flags); +		spin_unlock_irqrestore(lock, flags);  	}  } @@ -69,17 +69,25 @@ void __delayacct_blkio_start(void)  	current->delays->blkio_start = ktime_get_ns();  } -void __delayacct_blkio_end(void) +/* + * We cannot rely on the `current` macro, as we haven't yet switched back to + * the process being woken. + */ +void __delayacct_blkio_end(struct task_struct *p)  { -	if (current->delays->flags & DELAYACCT_PF_SWAPIN) -		/* Swapin block I/O */ -		delayacct_end(¤t->delays->blkio_start, -			¤t->delays->swapin_delay, -			¤t->delays->swapin_count); -	else	/* Other block I/O */ -		delayacct_end(¤t->delays->blkio_start, -			¤t->delays->blkio_delay, -			¤t->delays->blkio_count); +	struct task_delay_info *delays = p->delays; +	u64 *total; +	u32 *count; + +	if (p->delays->flags & DELAYACCT_PF_SWAPIN) { +		total = &delays->swapin_delay; +		count = &delays->swapin_count; +	} else { +		total = &delays->blkio_delay; +		count = &delays->blkio_count; +	} + +	delayacct_end(&delays->lock, &delays->blkio_start, total, count);  }  int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) @@ -153,8 +161,10 @@ void __delayacct_freepages_start(void)  void __delayacct_freepages_end(void)  { -	delayacct_end(¤t->delays->freepages_start, -			¤t->delays->freepages_delay, -			¤t->delays->freepages_count); +	delayacct_end( +		¤t->delays->lock, +		¤t->delays->freepages_start, +		¤t->delays->freepages_delay, +		¤t->delays->freepages_count);  } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 644fa2e3d993..a7bf32aabfda 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2056,7 +2056,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)  	p->state = TASK_WAKING;  	if (p->in_iowait) { -		delayacct_blkio_end(); +		delayacct_blkio_end(p);  		atomic_dec(&task_rq(p)->nr_iowait);  	} @@ -2069,7 +2069,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)  #else /* CONFIG_SMP */  	if (p->in_iowait) { -		delayacct_blkio_end(); +		delayacct_blkio_end(p);  		atomic_dec(&task_rq(p)->nr_iowait);  	} @@ -2122,7 +2122,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)  	if (!task_on_rq_queued(p)) {  		if (p->in_iowait) { -			delayacct_blkio_end(); +			delayacct_blkio_end(p);  			atomic_dec(&rq->nr_iowait);  		}  		ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK);  | 
